/usr/bin/tv_grab_uk

#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell

use warnings;
use strict;
use XMLTV::ProgressBar;
use XMLTV::Memoize; XMLTV::Memoize::check_argv('get_octets');
use XMLTV::Supplement qw/GetSupplement SetSupplementRoot/;
use XMLTV::Options qw/ParseOptions/;
use XMLTV::Configure::Writer;
use XMLTV::Ask;
use File::Path;
use File::Basename;
use LWP::UserAgent;
use HTTP::Cache::Transparent;
use Encode qw/decode encode/;
use DateTime;
use DateTime::Duration;
use DateTime::TimeZone;
use HTML::Entities;
use IO::Scalar;  # used for configuration to write channels to string
use XML::LibXML; # used for lineups parsing/modification

##############################################
#################### TODO ####################
##############################################

# - wrap date/file tests in evals if there is a risk of failure
#
# - remove fixups from prog_titles_to_process that are now handled automatically
#   e.g. duplicated title and/or episode in episode field
#
# - audio tag for audio described channels
#
###############################################
################## VARIABLES ##################
###############################################

# Grabber name
my $grabber_name = 'tv_grab_uk_rt';
my $grabber_spc  = '             ';

# Grabber version
my $grabber_cvs_id = '$Id: tv_grab_uk_rt,v 1.46 2015/08/22 00:54:26 knowledgejunkie Exp $';
my $grabber_version;

if ($grabber_cvs_id =~ m!\$Id: [^,]+,v (\S+) ([0-9/: -]+)!) {
    $grabber_version = "$1, $2";
}
else {
    $grabber_version = "Unknown";
}

# Default location of Radio Times channel index file
my $rt_listings_root = 'http://xmltv.radiotimes.com/xmltv';
my $rt_channels_uri  = "$rt_listings_root/channels.dat";

# The format of the Radio Times source data (set to strict UTF-8)
my $source_encoding = "utf-8";
# Default XML output encoding to use (set to strict UTF-8). May be updated
# based on contents of configuration file
my $xml_encoding    = "utf-8";

# Required to be displayed by Radio Times
my $rt_copyright
      = "\n"
      . "     +-----------------------------------------------------+     \n"
      . "     | In accessing this XML feed, you agree that you will |     \n"
      . "     | only access its contents for your own personal and  |     \n"
      . "     |  non-commercial use and not for any commercial or   |     \n"
      . "     |  other purposes, including advertising or selling   |     \n"
      . "     |  any goods or services, including any third-party   |     \n"
      . "     |   software applications available to the general    |     \n"
      . "     |           public. <xmltv.radiotimes.com>            |     \n"
      . "     +-----------------------------------------------------+     \n"
      . "\n";

my %tv_attributes = (
    'source-info-name'    => 'Radio Times XMLTV Service',
    'source-info-url'     => 'http://www.radiotimes.com',
    'source-data-url'     => "$rt_channels_uri",
    'generator-info-name' => "XMLTV/$XMLTV::VERSION, $grabber_name $grabber_version",
    'generator-info-url'  => 'http://www.xmltv.org',
);

# Reciprocal XMLTV/RT ID hashes for the required channel_ids fields, allowing
# RT ID -> XMLTV ID and XMLTV ID -> RT ID lookups
my (%rt_to_xmltv, %xmltv_to_rt);
# Hashes for the optional channel_ids fields, keyed by XMLTV ID
my (%extra_dn, %icon_urls, %channel_offset, %broadcast_hours, %video_quality);

# Do the progress bars need a final update?
my $need_final_update;

#type id source-data-url generator-info-name generator-info-url
my %xmltv_lineup_attributes = (
    'type'                => 'DVB-T',
    'version'             => '1.00',
    'id'                  => 'freeview.co.uk',
    'source-data-url'     => 'tv_grab_uk_rt FreeView channels',
    'generator-info-name' => "XMLTV/$XMLTV::VERSION, $grabber_name $grabber_version",
    'generator-info-url'  => 'http://www.xmltv.org',
);
# Lineup writer
my $lineup_writer;

# Get default location to store cached listings data
my $default_cachedir = get_default_cachedir();

# Set up LWP::UserAgent
my $ua = LWP::UserAgent->new;
$ua->agent("xmltv/$XMLTV::VERSION");
$ua->env_proxy;

# Read all command line options
my ( $opt, $conf ) = ParseOptions( {
    grabber_name => "$grabber_name",
    version => "$grabber_cvs_id",
    description => "United Kingdom/Republic of Ireland (Radio Times)",
    capabilities => [qw/baseline manualconfig cache preferredmethod tkconfig apiconfig lineups/],
    defaults => { days => 15, offset => 0, quiet => 0, debug => 0 },
    preferredmethod => 'allatonce',
    load_old_config_sub => \&load_old_config,

    stage_sub => \&config_stage,
    listchannels_sub => \&list_channels,

    list_lineups_sub => \&list_lineups,
    get_lineup_sub => \&get_lineup,
} );

################################################################
#    At this point, the script takes over from ParseOptions    #
################################################################

###############################################
############### GRAB THE DATA #################
###############################################

die "Error: You cannot specify --quiet with --debug, exiting"
    if ($opt->{quiet} && $opt->{debug});

if (defined $conf->{lineup}) {
    say("Channel selection: Lineup\n") if (! $opt->{quiet});
}
elsif (defined $conf->{channel}) {
    say("Channel selection: Config file\n") if (! $opt->{quiet});
}
else {
    print STDERR "No configured channels in config file ($opt->{'config-file'})\n" .
                 "Please run the grabber with --configure.\n";
    exit 1;
}

# New-style config files must include a cachedir entry
if (not defined( $conf->{cachedir} )) {
    print STDERR "No cachedir defined in configfile ($opt->{'config-file'})\n" .
                 "Please run the grabber with --configure.\n";
    exit 1;
}

# Update encoding if seen in new-style config file
if (defined( $conf->{encoding} )) {
    $xml_encoding = $conf->{encoding}[0];
}

# Enable title processing? Enable it by default if not explicitly disabled
my $title_processing;
if (defined( $conf->{'title-processing'} )) {
    $title_processing = $conf->{'title-processing'}[0];
}
else {
    $title_processing = 'enabled';
}

# Enable UTF-8 fixups? Enable by default if not explicitly disabled
my $utf8_fixups_status;
if (defined( $conf->{'utf8-fixups'} )) {
    $utf8_fixups_status = $conf->{'utf8-fixups'}[0];
}
else {
    $utf8_fixups_status = 'enabled';
}

# Initialise the cache-directory
init_cachedir( $conf->{cachedir}[0] );

# Set cache options
#
# MaxAge set to 15 days as Radio Times provides 14 days of listings
# NoUpdate set to 1hr as Radio Times data updated once per day
#
HTTP::Cache::Transparent::init( {
    BasePath       => $conf->{cachedir}[0],
    MaxAge         => 15*24,
    NoUpdate       => 60*60,
    Verbose        => $opt->{debug},
    ApproveContent => \&check_content_length,
    }
);

# A reusable 1 day duration object
my $day_dur = DateTime::Duration->new( days => 1 );

# Variables for programme title manipulation
my $have_title_data = 0;
my %non_title_info;           # key = title, value = title
my %mixed_title_subtitle;     # key = title, value = title
my @mixed_subtitle_title;     # array
my %reversed_title_subtitle;  # key = title, value = title
my %replacement_titles;       # key = old title, value = replacement title
my %replacement_episodes;     # key = title, value = hash (where key = old ep, value = new ep)
my %replacement_cats;         # key = title, value = category
my %replacement_title_eps;    # key = 'old_title . '|' . old_ep', value = (new_title, new_ep)
my %replacement_title_desc;   # key = 'old_title . '|' . old_ep' . '|' . 'old_desc', value = (new_title, new_ep)
my %flagged_title_eps;        # key = old_title from title fixup routine 8
my %dotdotdot_titles;         # key = replacement title ending with '...' seen in title fixup routine 8
my %replacement_ep_from_desc; # key = title, value = hash (where key = desc, value = new ep)
my %demoted_title;            # key = title, value = title
my %replacement_cats_film;    # key = title, value = category
my %subtitle_remove_text;     # key = title, value = hash (where key/value = text to remove)
my %uc_prog_titles;           # key = title, value = title
my %new_title_in_subtitle_fixed; # key = 'title . '|' . episode', value = hashref (keys are title and episode)
my %title_in_subtitle_fixed;  # key = 'title . '|' . episode', value = hashref (keys are title and episode)
my %title_ep_in_subtitle_fixed; # key = 'title . '|' . episode', value = hashref (keys are title and episode)
my %title_in_subtitle_notfixed; # key = 'title . '|' . episode', value = hashref (keys are title and episode)
my %colon_in_subtitle;        # key = 'title . '|' . episode', value = hashref (keys are title and episode)

# Create global hashes to store programme/film titles for all programmes on all
# channels, as we will process these lists after grabbing to determine any
# titles which may need to be 'fixed up'
my %prog_titles;
my %film_titles;
# hash to store case/punctuation-insensitive variants of titles
my %case_insens_titles;

# Hash to store bad character strings and their replacments that are used when
# processing the source data to remove mis-encoded UTF-8 characters
my %utf8_fixups;

# Create hashes to store names/urls of channels with occurences of mis-encoded
# UTF-8 data after our replacement routines have run
my %hasC27F9Fchars;
my %hadEFBFBD;
my %hadC3AFC2BFC2BD;

# Create hashes to store uncategorised programmes and available categories
# to potentially use for such programmes
my %uncategorised_progs;
my %reality_progs;
my %categories;
my %cats_per_prog;
my %short_films;

# Create hashes to store episode details that may still contain series, episode
# or part numbering after processing to handle these has been carried out
my %possible_series_nums;
my %possible_episode_nums;
my %possible_part_nums;

# Hash to map cast roles seen in the source data to valid XMLTV credits roles
my %credits_role_map;
my @valid_roles = ('director', 'actor', 'writer', 'adapter', 'producer', 'composer', 'editor', 'presenter', 'commentator', 'guest');

# Track roles seen in the source data
my %seen_roles;

# Hash to store titles containing text that should likely be removed
my %title_text_to_remove;

# Hash to store details of empty source listings
my %empty_listings;

# Track problems during listings retrieval. Currently we exit(1) only if
# listings data is missing for any requested channels
my $chan_warnings = 0;
my $prog_warnings = 0;

# Output XMLTV library and grabber versions
if (! $opt->{quiet}) {
    say("Program/library version information:\n");
    say("XMLTV library version: $XMLTV::VERSION");
    say("$grabber_name version: $grabber_version");
    say("  libwww-perl version: $LWP::VERSION\n");
}

# Determine the modification time of the source data on the RT servers
my $rt_mod_time = get_mod_time($rt_channels_uri);
if ($rt_mod_time) {
    say("\nSource data last updated on: " . $rt_mod_time . "\n") if (! $opt->{quiet});
    $tv_attributes{'date'} = $rt_mod_time;
}

# Retrieve list of all channels currently available
my $available_channels = load_available_channels($conf, $opt);
# Now ensure configured channels are still available to download
my $wanted_chs = get_wanted_channels_aref($conf, $opt, $available_channels);

# Configure output and write XMLTV data - header, channels, listings, and footer
my $writer;
setup_xmltv_writer($conf, $opt);
write_xmltv_header();
write_channel_list($wanted_chs);
write_listings_data($conf, $opt, $wanted_chs);
write_xmltv_footer();

# Print debug info for titles, categories, bad utf-8 chars
if ($opt->{debug}) {
    print_titles_with_colons();
    print_titles_with_hyphens();
    print_new_titles();
    print_uc_titles_post();
    print_title_variants();
    print_titles_inc_years();
    print_titles_inc_bbfc_certs();
    print_flagged_title_eps();
    print_dotdotdot_titles();
    print_new_title_in_subtitle();
    print_title_in_subtitle();
    print_categories();
    print_uncategorised_progs();
    print_reality_progs();
    print_cats_per_prog();
    print_short_films();
    print_possible_prog_numbering();
    print_misencoded_utf8_data();
    print_unhandled_credits_roles();
}

# Give a useful exit status if data for some channels was not downloaded
if (! $opt->{quiet}) {
    print_empty_listings();
    if ($chan_warnings) {
        say("\nFinished, but listings for some configured channels are missing. Check debug log.\n");
        exit(1);
    }
    elsif ($prog_warnings) {
        say("\nFinished, but listings for some programmes could not be processed. Check debug log.\n");
        exit(0);
    }
    else {
        say("\nFinished!\n");
        exit(0);
    }
}

###############################################
################ SUBROUTINES ##################
###############################################

sub get_default_cachedir {
    my $winhome = undef;
    if (defined $ENV{HOMEDRIVE} && defined $ENV{HOMEPATH}) {
        $winhome = $ENV{HOMEDRIVE} . $ENV{HOMEPATH};
    }

    my $home = $ENV{HOME} || $winhome || ".";
    my $dir = "$home/.xmltv/cache";
    t("Using '$dir' as cache-directory for XMLTV listings");
    return $dir;
}

sub load_old_config {
    my ( $config_file ) = @_;

    if (! $opt->{quiet}) {
        say("Using old-style config file");
    }

    my @config_entries = XMLTV::Config_file::read_lines( $config_file );

    my $conf = {};
    # Use default cachedir as there was no support for choosing an alternative
    # cache directory before ParseOptions support was added to the grabber.
    $conf->{cachedir}[0] = $default_cachedir;
    $conf->{channel} = [];

    CONFIG_ENTRY:
    foreach my $config_entry (@config_entries)
    {
        next CONFIG_ENTRY if (! defined $config_entry);
        next CONFIG_ENTRY if ($config_entry =~ m/^#/ || $config_entry =~ m/^\s*$/);
        if ($config_entry !~ m/^channel\s+(\S+)$/) {
            if (! $opt->{quiet}) {
                say("Bad line '$config_entry' in config file, skipping");
            }
            next CONFIG_ENTRY;
        }

        my( $command, $param ) = split( /\s+/, $config_entry, 2 );
        $param =~ tr/\n\r//d;
        $param =~ s/\s+$//;

        # We only support channel entries in the old-style config
        if ($command =~ m/^\s*channel\s*$/) {
            push @{$conf->{channel}}, $param;
        }
        else {
            die "Unknown command '$command' in config file $config_file"
        }
    }

    return $conf;
}

sub init_cachedir {
    my $path = shift @_;
    if (! -d $path) {
        if (mkpath($path)) {
            t("Created cache-directory '$path'");
        }
        else {
            die "Error: Failed to create cache-directory $path: $@, exiting";
        }
    }
}

# Check whether data files on the RT website are empty but still online, or
# contain HTML/XML from the Radio Times' error page.
#
# These files will have a good HTTP response header as they exist, but they
# contain no data. Caching via HCT without checking for a non-zero content_size
# beforehand will therefore overwrite good data with bad. Any file having a
# content_length of 0 or seen to contain DOCTYPE info will not be cached and the
# existing cached copy of the file will be used instead.
#
# Support for this functionality requires using at least the 1.0 version of
# HTTP::Cache::Transparent, which can be obtained from CPAN.
#
sub check_content_length {
    my $rt_file = shift @_;
    if ($rt_file->is_success) {
        # reject an empty (but available) file
        if ($rt_file->content_length == 0) {
            return 0;
        }
        # an empty source file containing only the RT disclaimer has a length
        # of approx 300 bytes
        elsif ($rt_file->content_length < 400) {
            return 0;
        }
        # reject a likely HTML error page
        elsif ($rt_file->content =~ m/DOCTYPE/) {
            return 0;
        }
        # cache a likely good file
        else {
            return 1;
        }
    }
    # reject file if retrieval failed
    else {
        return 0;
    }
}

# Get the last-modified time of a successful HTTP Response object. Return
# undef on error
sub get_mod_time {
    my $resp = $ua->get(shift @_);
    if ($resp->is_error) {
        return undef;
    }
    else {
        return $resp->header('Last-Modified');
    }
}

# Determine all currently available channels by reading the current Radio
# Times list of channels, and adding additional information from the
# grabber's channel_ids file. The content of both of these files is
# required in order to proceed with listings retrieval.
#
sub load_available_channels {
    my ( $conf, $opt ) = @_;

    # Update encoding if seen in new-style config file
    if (defined( $conf->{encoding} )) {
        $xml_encoding = $conf->{encoding}[0];
    }

    # First we read in the XMLTV channel_ids file to provide additional
    # information (icon, display name) about available channels, and also
    # provide the information necessary for timeshifted and part-time channel
    # support.
    #
    # We use the hashes %rt_to_xmltv and %xmltv_to_rt to lookup the Radio
    # Times and XMLTV channel IDs. These will deal sensibly with a new RT
    # channel that isn't yet mentioned in the channel_ids file.

    # Provide statistics for the number of usable, unusable, timeshifted,
    # part-time, and part-time timeshifted channels listed in channel_ids.
    my $num_good_channels = 0;
    my $num_bad_channels = 0;
    my $num_ts_channels = 0;
    my $num_pt_channels = 0;
    my $num_pt_ts_channels = 0;

    # Retrieve grabber's channel_ids file via XMLTV::Supplement
    my $xmltv_channel_ids = GetSupplement("$grabber_name", 'channel_ids');

    die "Error: XMLTV channel_ids data is missing, exiting"
        if (! defined $xmltv_channel_ids || $xmltv_channel_ids eq '');

    my @lines = split /[\n\r]+/, $xmltv_channel_ids;

    t("\nExtended XMLTV channel information:\n");

    XMLTV_CHANID_ENTRY:
    foreach my $line (@lines) {
        # Skip blank lines. Comments are allowed if they are at the start
        # of the line.
        next XMLTV_CHANID_ENTRY if ($line =~ m/^#/ || $line =~ m/^$/);
        my @fields = split /\|/, $line;
        # We need at least 2 fields (xmltv_id,rt_id) to run the grabber.
        # No check on maximum number of fields to support future updates
        # to channel_ids now we are using XMLTV::Supplement.
        if (scalar @fields < 2) {
            t("Wrong number of fields in XMLTV channel_ids entry:\n"
                    . "\t" . $line);
            next XMLTV_CHANID_ENTRY;
        }

        # The channel_ids fields are:
        # 1) XMLTV ID
        # 2) RT ID
        # 3) Channel name
        # 4) Channel icon URL
        # 5) Timeshift offset
        # 6) Broadcast hours
        # 7) Video quality
        #
        # The RT channels.dat provides a channel name, but it may be out of
        # date - here we provide an alternative or updated name if the
        # channel name has changed
        my ($xmltv_id, $rt_id,          $extra_dn,
            $icon_url, $channel_offset, $broadcast_hours,
            $video_quality) = @fields;

        # Flag timeshifted and part-time channels for stats
        my ($is_timeshift, $is_parttime);

        # Check for required XMLTV ID and RT ID fields, skip if missing
        if (! defined $xmltv_id  || $xmltv_id eq '') {
            t("Undefined XMLTV ID seen in channel_ids, skipping");
            next XMLTV_CHANID_ENTRY;
        }
        if ($xmltv_id !~ m/\w+\.\w+.*/) {
            t("Invalid XMLTV ID seen in channel_ids, skipping");
            next XMLTV_CHANID_ENTRY;
        }
        if (! defined $rt_id || $rt_id eq '') {
            t("Undefined RT ID seen in channel_ids, skipping");
            next XMLTV_CHANID_ENTRY;
        }
        if ($rt_id !~ m/^\d+$/) {
            t("Invalid RT ID seen in channel_ids, skipping");
            next XMLTV_CHANID_ENTRY;
        }

        # Check for duplicate RT IDs having same associated XMLTV ID. As part of
        # timeshifted/part-time channel support, we associate the same RT ID
        # with different XMLTV IDs
        foreach my $id (@{$rt_to_xmltv{$rt_id}}) {
            if (defined $id && $id eq $xmltv_id) {
                t("Radio Times ID '$rt_id' already seen in XMLTV "
                  . "channel_ids file, skipping");
                next XMLTV_CHANID_ENTRY;
            }
        }

        # Check whether current XMLTV ID has already been seen
        if (defined $xmltv_to_rt{$xmltv_id}) {
            t("XMLTV ID '$xmltv_id' already seen in XMLTV channel_ids file, skipping");
            next XMLTV_CHANID_ENTRY;
        }

        # Store the XMLTV channel description, report if it is missing
        if (defined $extra_dn) {
            if ($extra_dn eq '' || $extra_dn !~ m/\w+/) {
                $extra_dn = undef;
                if (defined $extra_dn && $extra_dn !~ m/\(Do\ Not\ Use\)/) {
                    t("No XMLTV channel name associated with '$xmltv_id'");
                }
            }
            else {
                $extra_dn{$xmltv_id} = $extra_dn;
            }
        }

        # Check for channel icon
        if (defined $icon_url) {
            if ($icon_url eq '' || $icon_url !~ m/^http/) {
                $icon_url = undef;
                if (defined $extra_dn && $extra_dn !~ m/\(Do\ Not\ Use\)/) {
                    t("No channel icon associated with '$xmltv_id'");
                }
            }
            else {
                $icon_urls{$xmltv_id} = $icon_url;
            }
        }

        # Check for valid timeshift offset
        if (defined $channel_offset) {
            if ($channel_offset eq '' || $channel_offset !~ m/^(\+|\-)/) {
                $channel_offset = undef;
            }
            else {
                $channel_offset{$xmltv_id} = $channel_offset;
                if (defined $extra_dn && $extra_dn !~ m/\(Do\ Not\ Use\)/) {
                    t("Channel '$xmltv_id' has timeshift of '$channel_offset'");
                }
                $is_timeshift = 1;
            }
        }

        # Check for correct broadcast hours format (HHMM-HHMM)
        if (defined $broadcast_hours) {
            if ($broadcast_hours eq '' || $broadcast_hours !~ m/\d{4}-\d{4}/) {
                $broadcast_hours = undef;
            }
            else {
                $broadcast_hours{$xmltv_id} = $broadcast_hours;
                if (defined $extra_dn && $extra_dn !~ m/\(Do\ Not\ Use\)/) {
                    t("Channel '$xmltv_id' is on air '$broadcast_hours'");
                }
                $is_parttime = 1;
            }
        }

        # Check for presence of video quality information (SDTV or HDTV)
        if (defined $video_quality) {
            if ($video_quality eq '' || $video_quality !~ m/SDTV|HDTV/) {
                $video_quality = undef;
            }
            else {
                $video_quality{$xmltv_id} = $video_quality;
            }
        }

        # Handle multiple XMLTV IDs associated with a single RT ID. Required
        # after introduction of timeshifted and part-time channel support,
        # which map multiple XMLTV IDs to a single RT ID.
        push @{$rt_to_xmltv{$rt_id}}, $xmltv_id;
        $xmltv_to_rt{$xmltv_id} = $rt_id;

        # Update the counts of part-time and timeshifted channels
        if ($is_timeshift && $is_parttime) {
            $num_pt_ts_channels++;
        }
        elsif ($is_timeshift) {
            $num_ts_channels++;
        }
        elsif ($is_parttime) {
            $num_pt_channels++;
        }

        # Finally, update count of good/bad channels
        if ($extra_dn =~ m/\(Do\ Not\ Use\)/) {
            $num_bad_channels++;
        }
        else {
            $num_good_channels++;
        }
    }
    t("\n");
    # channel_ids processing finished

    die "Error: No usable XMLTV channel definitions seen in channel_ids, exiting"
        if (! defined $num_good_channels || $num_good_channels < 1);

    # Read in the Radio Times channels.dat file, which is supplied in UTF-8
    # format. We process the list of available channels and check for
    # presence of duplicate IDs or names.

    # TESTING: config-file key-value to override default location of channels.dat
    if (defined( $conf->{'rt_listings_root'} )) {
        t("\nUsing custom Radio Times listings root of '" . $conf->{'rt_listings_root'}[0] . "'\n");
        $rt_channels_uri = $conf->{'rt_listings_root'}[0] . "/channels.dat";
    }

    #
    # Grab the octets
    t("Retrieving channel list from Radio Times website");
    my $rt_channels_dat = get_octets( $rt_channels_uri );

    die "Error: Radio Times channels.dat data is missing, exiting\n"
        . "Please check $rt_channels_uri"
        if (! defined $rt_channels_dat || $rt_channels_dat eq '');

    # Decode source UTF-8 octets, process for HTML entities, and encode
    # into configured output encoding
    t("\nDecoding channel data from $source_encoding octets into Perl's internal format");
    $rt_channels_dat = decode($source_encoding, $rt_channels_dat);
    t("Processing for HTML entities seen in the channel data");
    decode_entities($rt_channels_dat);
    t("Encoding channel data from Perl's internal format into $xml_encoding octets\n");
    $rt_channels_dat = encode($xml_encoding, $rt_channels_dat);

    my @rt_channels = split /\n/, $rt_channels_dat;
    my $num_rt_channels = scalar @rt_channels;

    $need_final_update = 0;
    my $chans_bar;
    if (! $opt->{quiet} && ! $opt->{debug}) {
        $chans_bar = new XMLTV::ProgressBar({name   => 'Retrieving channels',
                                             count  => $num_rt_channels,
                                             ETA    => 'linear', });
    }

    # Hash to store details for <channel> elements
    my %channels;
    my (%seen_rt_id, %seen_name);
    my $num_good_rt_channels = 0;
    my $to_say = "";

    RT_CHANDAT_ENTRY:
    foreach my $rt_channel (@rt_channels) {
        chomp $rt_channel;
        # ignore empty line and disclaimer at start of file
        if ($rt_channel =~ m/^\s*$/ || $rt_channel =~ /^In accessing this XML feed/) {
            next RT_CHANDAT_ENTRY;
        }

        if ($rt_channel !~ m/^(\d+)\|(.+)/) {
            t("Bad entry '$rt_channel' seen in RT channels.dat, skipping");
            next RT_CHANDAT_ENTRY;
        }

        my ($rt_id, $rt_name) = ($1, $2);
        if ($seen_rt_id{$rt_id}++) {
            t("Duplicate channnel ID '$rt_id' seen in RT channels.dat, skipping");
            next RT_CHANDAT_ENTRY;
        }

        if ($seen_name{$rt_name}++) {
            t("Duplicate channel name '$rt_name' seen in RT channels.dat");
        }

        # Check whether there is at least one XMLTV ID associated with the RT ID
        #
        # If the current RT channel has a known XMLTV ID, check it against known bad
        # channels and skip it if required. If the channel does not have an
        # XMLTV ID, create one and continue.
        #
        my $xmltv_id = $rt_to_xmltv{$rt_id}[0];
        if (defined $xmltv_id) {
            # Skip any RT entries which have been flagged as bad in channel_ids file
            if ($extra_dn{ $rt_to_xmltv{$rt_id}[0] } =~ m/\(Do\ Not\ Use\)/) {
                t("Channel '$rt_name' ($rt_id) flagged as bad, skipping");
                $need_final_update = 1;
                next RT_CHANDAT_ENTRY;
            }
        }
        else {
            # Handle new channels available on RT site unknown to channel_ids file
            $to_say .= "Unknown channel '$rt_name'. Will configure as 'C$rt_id.radiotimes.com'\n";
            push @{$rt_to_xmltv{$rt_id}}, "C$rt_id.radiotimes.com";
        }

        foreach my $id (@{$rt_to_xmltv{$rt_id}}) {
            # Use a name for the channel if defined in our channel_ids file,
            # otherwise use the name supplied by the Radio Times.
            my @names = ();
            if (defined $extra_dn{$id}) {
                @names = ([ $extra_dn{$id} ]);
            }
            else {
                @names = ([ $rt_name ]);
            }

            # Add a URL for a channel icon if available.
            my @icon;
            my $icon_url = $icon_urls{$id};
            if ($icon_url) {
                @icon = { 'src' => $icon_url };
            }

            # Add the channel's details to the %channels hash, adding icon
            # details if available.
            if (@icon) {
                $channels{$id} = {
                    id             => $id,
                    rt_id          => $rt_id,
                    'display-name' => \@names,
                    'icon'         => \@icon,
                };
            }
            else {
                $channels{$id} = {
                    id             => $id,
                    rt_id          => $rt_id,
                    'display-name' => \@names,
                };
            }
        }
        # We have a usable channel definition at this point
        $num_good_rt_channels++;

        # Update the progres bar by one increment
        if (defined $chans_bar) {
            $chans_bar->update();
        }
    }

    die "Error: No usable Radio Times channel definitions available, exiting"
        if ($num_good_rt_channels < 1);

    if (defined $chans_bar) {
        # Only update the progress bar to 100% if we need to
        if ($need_final_update) {
            $chans_bar->update($num_rt_channels);
        }
        $chans_bar->finish();
        if (! $opt->{quiet}) {
            say( "\n" );
        }
    }

    if (! $opt->{quiet} && $to_say) {
        say( $to_say );
        say("\n  Please notify the maintainer to get the new channels added");
    }

    # Output statistics on the number of channels currently available
    if (! $opt->{quiet}) {
        say("\nThe Radio Times has usable data available for $num_good_rt_channels channels which we\n"
            . "can use to generate TV listings for regular and some timeshifted\n"
            . "channels. The tv_grab_uk_rt software also has support for an additional\n"
            . "$num_ts_channels timeshifted, $num_pt_channels part-time, and $num_pt_ts_channels part-time timeshifted channels\n"
            . "based on the Radio Times data.\n\n"
            . "In total, tv_grab_uk_rt currently supports $num_good_channels channels.\n");
    }

    # Report any channels listed in channel_ids not seen on the Radio Times
    # site
    if (! $opt->{quiet}) {
        XMLTV_ID:
        foreach my $xmltv_id (keys %xmltv_to_rt) {
            # Ignore channels flagged as bad in channel_ids
            next XMLTV_ID if ($extra_dn{$xmltv_id} =~ m/.*Do\ Not\ Use.*/);
            if (! exists $channels{$xmltv_id}) {
                say("XMLTV channel '$xmltv_id' ($xmltv_to_rt{$xmltv_id}) "
                   . "not seen on RT site\n");
            }
        }
    }

    return \%channels;
}

# Determine options for, and create XMLTV::Writer object
sub setup_xmltv_writer {
    my ( $conf, $opt ) = @_;

    # Update encoding if seen in new-style config file
    if (defined( $conf->{encoding} )) {
        $xml_encoding = $conf->{encoding}[0];
    }

    # output options
    my %g_args = ();
    if (defined $opt->{output}) {
        t("\nOpening XML output file '$opt->{output}'\n");
        my $fh = new IO::File ">$opt->{output}";
        die "Error: Cannot write to '$opt->{output}', exiting" if (! $fh);
        %g_args = (OUTPUT => $fh);
    }

    # Determine how many days of listings are required and range-check, applying
    # default values if impossible. If --days or --offset is specified we must
    # ensure that values for days, offset and cutoff are passed to XMLTV::Writer
    my %d_args = ();
    if (defined $opt->{days} || defined $opt->{offset}) {
        if (defined $opt->{days}) {
            if ($opt->{days} < 1 || $opt->{days} > 15) {
                if (! $opt->{quiet}) {
                    say("Specified --days option is not possible (1-15). "
                      . "Retrieving all available listings.");
                }
                $opt->{days} = 15
            }
        }
        else {
            $opt->{days} = 15;
        }

        if (defined $opt->{offset}) {
            if ($opt->{offset} < 0 || $opt->{offset} > 14) {
                if (! $opt->{quiet}) {
                    say("Specified --offset option is not possible (0-14). "
                      . "Retrieving all available listings.");
                }
                $opt->{offset} = 0;
            }
        }
        else {
            $opt->{offset} = 0;
        }
        $d_args{days} = $opt->{days};
        $d_args{offset} = $opt->{offset};
        # We currently don't provide a --cutoff option
        $d_args{cutoff} = "000000";
    }

    t("Setting up XMLTV::Writer using \"" . $xml_encoding . "\" for output");
    $writer = new XMLTV::Writer(%g_args, %d_args, encoding => $xml_encoding);
}

sub write_xmltv_header {
    t("Writing XMLTV header");
    $writer->start(\%tv_attributes);
}

sub write_channel_list {
    my $wanted_chs = shift;

    t("Started writing <channel> elements...");
    foreach my $chan_href (@{$wanted_chs}) {
        my %h = %$chan_href;
        delete $h{'rt_id'};
        $writer->write_channel(\%h);
    }
    t("Finished writing <channel> elements");
}

# Download listings data for configured channels that are available
sub write_listings_data {
    my ( $conf, $opt, $wanted_chs ) = @_;

    # Update encoding if seen in new-style config file
    if (defined( $conf->{encoding} )) {
        $xml_encoding = $conf->{encoding}[0];
    }

    my $num_req_chans = scalar @{$wanted_chs};

    if (! $opt->{quiet}) {
        display_copyright();
        say("Will download listings for $num_req_chans configured channels\n");
    }

    my $listings_bar;
    if (! $opt->{quiet} && ! $opt->{debug}) {
        $listings_bar = new XMLTV::ProgressBar({name  => 'Retrieving listings',
                                                count => $num_req_chans,
                                                ETA   => 'linear', });
    }

    # Was title processing enabled in config file?
    if ($title_processing eq 'enabled') {
        t("Extra title processing is enabled\n");
        load_prog_titles_to_process();
    }
    else {
        t("Extra title processing is disabled\n");
    }

    # Were UTF-8 fixups enabled in config file?
    if ($utf8_fixups_status eq 'enabled') {
        t("UTF-8 fixups are enabled\n");
        load_utf8_fixups();
    }
    else {
        t("UTF-8 fixups are disabled\n");
    }

    # Hash to hold warnings of incorrect number of fields. The warning
    # is given once per listings file if noticed more than once
    my %warned_wrong_num_fields;

    # Reset check for final progress bar update
    $need_final_update = 0;

    # TESTING: config-file key-value to override default location of channel listings
    if (defined( $conf->{'rt_listings_root'} )) {
        t("\nUsing custom Radio Times listings root of '" . $conf->{'rt_listings_root'}[0] . "'\n");
        $rt_listings_root = $conf->{'rt_listings_root'}[0];
    }

    # Process all of the channels we want listings for
    WANTED_CH:
    foreach my $chan_href (@{$wanted_chs}) {
        my $xmltv_id = $chan_href->{'id'};
        my $rt_id    = $chan_href->{'rt_id'};
        my $rt_name  = $chan_href->{'display-name'}[0][0];
        if (! defined $rt_id) {
            t("No Radio Times ID for channel '$rt_name', skipping");
            next WANTED_CH;
        }

        # Create the channel's URL based on ID
        my $rt_listings_uri = "$rt_listings_root/$rt_id.dat";
        # Include the URL in any warn/die messages
        local $SIG{__DIE__} = sub { die "$rt_listings_uri: $_[0]" };
        local $SIG{__WARN__} = sub { warn "$rt_listings_uri: $_[0]" };

        # Read in the listings data for the channel as UTF-8 octets. We will
        # process the raw octets before decoding them to Perl's internal
        # format below.
        t("\nRetrieving listings for '$rt_name'");
        my $page = get_octets( $rt_listings_uri );

        if (! defined $page || $page eq '') {
            if (! $opt->{quiet}) {
                say("No listings data available for '$rt_name' ($xmltv_id), skipping");
            }
            $chan_warnings++;
            next WANTED_CH;
        }
        if (! $opt->{quiet}) {
            say("Processing listings for '$rt_name' ($xmltv_id)");
        }
        t("");

        my $ts_dt;
        if (defined $channel_offset{$xmltv_id}) {
            t("  Detected a channel offset of '$channel_offset{$xmltv_id}' for '$rt_name'");

            # Setup a reusable Duration object for this channel's timeshift
            $channel_offset{$xmltv_id} =~ m/[+](\d+)hours?/;
            $ts_dt = DateTime::Duration->new( hours => $1 );
        }

        # detect/correct UTF-8 errors in source data
        $page = process_utf8_fixups($page, $rt_name, $rt_listings_uri);

        # Decode source UTF-8 octets and process for HTML entities
        t("\nDecoding listings data from $source_encoding octets into Perl's internal format");
        $page = decode($source_encoding, $page);
        t("Processing for HTML entities seen in the listings data");
        decode_entities($page);

        ##### From this point, $page is in a Perl string #####

        # Start to process individual programme entries found in listings
        t("Started processing programmes for channel '$rt_name'\n");

        # list to store programme elements for writing when each channel is parsed
        my @programmes = ();

        # Track number of programmes per channel
        my $num_titles = 0;

        PROGRAMME:
        foreach my $prog (split /\n/, $page) {

            # ignore empty line and disclaimer at start of each file
            if ($prog =~ m/^\s*$/ || $prog =~ m/^In accessing this XML feed/) {
                next PROGRAMME;
            }

            my @fields = split /\~/, $prog;
            if (scalar @fields < 23) {
                if ($warned_wrong_num_fields{$xmltv_id}++) {
                    t("  Too few data fields (need at least 23) in line:\n  $prog\n");
                }
                $prog_warnings++;
                t("\n  ----\n");
                next PROGRAMME;
            }
            # Remove any spaces at start/end of fields
            foreach my $field (@fields) {
                $field =~ s/^\s+//;
                $field =~ s/\s+$//;
                undef $field if !length $field;
            }

            # Description of Radio Times data fields (23 in total):
            #
            #  1            title - the programme title (text)
            #  2        sub_title - used to carry series/episode numbering (text)
            #  3          episode - used to carry the name/subtitle of an episode of the
            #                       programme (text)
            #  4             year - the year of production (text)
            #  5         director - the programme's director(s) (text)
            #  6             cast - the programme's cast (may include character details) (text)
            #  7         premiere - whether this is a film's first showing (boolean)
            #  8             film - whether the programme is a film (boolean)
            #  9           repeat - whether the programme has been shown before (boolean)
            # 10        subtitles - whether subtitles are available (boolean)
            # 11       widescreen - whether the broadcast is 16:9 widescreen (boolean)
            # 12       new_series - whether the programme is the first episode in a
            #                       series new (boolean)
            # 13      deaf_signed - whether in-vision signing is available (boolean)
            # 14  blank_and_white - whether the broadcast is not in colour (boolean)
            # 15      star_rating - a star rating between 0 and 5 for films (text)
            # 16      certificate - the BBFC certificate for the programme (text)
            # 17            genre - the genre of the programme (text)
            # 18             desc - a description of the programme. Can be a specific review
            #                       by a Radio Times reviewer (text)
            # 19           choice - whether the programme is recommended by the
            #                       Radio Times (boolean)
            # 20             date - the transmission date (text)
            # 21            start - the transmission start time for the programme (text)
            # 22             stop - the transmissions stop time for the programme (text)
            # 23    duration_mins - the duration of the programme in minutes (text)
            #

            # Hash to store all programme-specific variables. Initially store
            # the channel's XMLTV ID and name.
            my %prog = (channel => $xmltv_id, '_rt_name' => $rt_name);

            # Store fields against temp keys. We will assign values to the XMLTV
            # specific keys during processing. Key names starting with "_" are
            # ignored by XMLTV::Writer.
            ( $prog{'_title'},       $prog{'_sub_title'},       $prog{'_episode'},
              $prog{'_year'},        $prog{'_director'},        $prog{'_cast'},
              $prog{'_premiere'},    $prog{'_film'},            $prog{'_repeat'},
              $prog{'_subtitles'},   $prog{'_widescreen'},      $prog{'_new_series'},
              $prog{'_deaf_signed'}, $prog{'_black_and_white'}, $prog{'_star_rating'},
              $prog{'_certificate'}, $prog{'_genre'},           $prog{'_desc'},
              $prog{'_choice'},      $prog{'_date'},            $prog{'_start'},
              $prog{'_stop'},        $prog{'_duration_mins'},
            ) = @fields;

            # Validate key fields (title/date/time) before processing
            if (! validate_key_fields(\%prog)) {
                $prog_warnings++;
                t("\n  ----\n");
                next PROGRAMME;
            }
            # Check true/false fields for valid data
            foreach my $field ('_premiere',   '_film',            '_repeat',
                               '_subtitles',  '_widescreen',      '_new_series',
                               '_deaf_signed','_black_and_white', '_choice',   ) {
                if (! validate_boolean_field(\%prog, $field) ) {
                    $prog_warnings++;
                    t("\n  ----\n");
                    next PROGRAMME;
                }
            }

            t("  Processing programme title '" . $prog{'_title'} . "'");

            t("    Is flagged as a film.") if ($prog{'_film'});
            t("    Has no genre provided.") if (! defined $prog{'_genre'});

            # Check for DST-related information in title
            check_explicit_tz_in_title(\%prog);
            # Remove any last-minute scheduling messages from desc
            remove_updated_listing_desc(\%prog);
            # Check for episode numbering in sub_title field
            check_numbering_in_subtitle(\%prog);

            # At this point, $prog{'_sub_title'} should be undefined with all
            # text either parsed out or moved into $prog{'_episode'}

            # Check for null or invalid release year
            validate_year_field(\%prog);
            # Remove production year information from $episode for films
            remove_year_from_episode(\%prog);
            # Tidy $title text before title processing
            tidy_title_text(\%prog);
            # Store uppercase titles for late analysis
            check_uppercase_titles(\%prog);

            # Debug output before any title processing takes place
            my $ep_in = "<UNDEF>";
            if (defined $prog{'_episode'}) {
                $prog{'_episode'} =~ s/\s+/ /g;  # tidy whitespace
                $ep_in = $prog{'_episode'};
            }
            t("    Pre-processing title/episode:  "
                    . "[ \"" . $prog{'_title'} . "\" ] | [ \"" . $ep_in . "\" ]");

            # Remove "New $title" if seen in episode field
            remove_duplicated_new_title_in_ep(\%prog);
            # Remove a duplicated programme title/ep if seen in episode field
            remove_duplicated_title_and_ep_in_ep(\%prog);
            # Remove a duplicated programme title if seen in episode field
            remove_duplicated_title_in_ep(\%prog);
            # Title and episode processing. We process titles if the user has
            # not explicitly disabled title processing during configuration
            # and we have supplement data to process programmes against.
            process_title_fixups(\%prog);
            # Look for series/episode/part numbering in programme title/subtitle
            check_potential_numbering_in_text(\%prog);
            # Tidy $episode text after title processing
            tidy_episode_text(\%prog);

            # Output updated title/episode information after processing
            my $ep_out = "<UNDEF>";
            if (defined $prog{'_episode'}) {
                $prog{'_episode'} =~ s/\s+/ /g;  # tidy whitespace
                $ep_out = $prog{'_episode'};
            }
            t("    Post-processing title/episode: "
                    . "[ \"" . $prog{'_title'} . "\" ] | [ \"" . $ep_out . "\" ]");

            # Store title debug info for later analysis
            store_title_debug_info(\%prog);
            # Remove film title duplicated in $episode field
            check_duplicated_film_title(\%prog);
            # Check for film without a valid release year
            check_missing_film_year(\%prog);
            # Tidy $desc text after title processing
            tidy_desc_text(\%prog);
            # Check description for possible premiere/repeat hints
            update_premiere_repeat_flags_from_desc(\%prog);
            # Look for series/episode numbering in programme description
            extract_numbering_from_desc(\%prog);
            # Create episode numbering
            my $ep_num = generate_episode_numbering(\%prog);

            # Create credits structure
            generate_cast_list(\%prog);

            # Store genre debug info for later analysis
            store_genre_debug_info(\%prog);
            # Create start/stop timings
            if (! generate_start_stop_times(\%prog, $ts_dt)) {
                $prog_warnings++;
                t("\n  ----\n");
                next PROGRAMME;
            }

            # After processing is finished, create the %prog keys
            # that will be written out by XMLTV::Writer, encoding the
            # Perl string data back to the chosen output format

            $prog{title} = [ [ encode($xml_encoding, $prog{'_title'}) ] ];

            if (defined $prog{'_episode'} && $prog{'_episode'} !~ m/^\s*$/) {
                $prog{'sub-title'} = [ [ encode($xml_encoding, $prog{'_episode'}) ] ];
            }
            if (defined $prog{'_desc'} && $prog{'_desc'} !~ m/^\s*$/) {
                $prog{desc} = [ [ encode($xml_encoding, $prog{'_desc'}), 'en' ] ];
            }
            if (defined $ep_num) {
                $prog{'episode-num'} = [ [ $ep_num, "xmltv_ns" ] ];
            }
            if (defined $prog{'_director'} && $prog{'_director'} !~ m/^\s*$/) {
                $prog{credits}{director} = [ encode($xml_encoding, $prog{'_director'}) ];
            }
            if (defined $prog{'_year'}) {
                $prog{date} = $prog{'_year'};
            }
            if (defined $prog{'_genre'} && ! $prog{'_film'}) {
                push @{$prog{category}}, [ encode($xml_encoding, $prog{'_genre'}), 'en' ];
            }
            elsif ($prog{'_film'}) {
                push @{$prog{category}}, [ 'Film', 'en' ];
            }
            if ($prog{'_widescreen'}) {
                $prog{video}{aspect} = '16:9';
            }
            if ($prog{'_black_and_white'}) {
                $prog{video}{colour} = 0;
            }
            if (defined $video_quality{$xmltv_id}) {
                if ($video_quality{$xmltv_id} =~ m/HDTV/) {
                    $prog{video}{quality} = 'HDTV';
                    $prog{video}{aspect} = '16:9';
                }
                elsif ($video_quality{$xmltv_id} =~ m/SDTV/) {
                    $prog{video}{quality} = 'SDTV';
                }
            }
            if ($prog{'_premiere'}) {
                if (defined $channel_offset{$xmltv_id}) {
                    $prog{'_repeat'} = 1;
                    t("    Ignoring premiere flag on timeshifted channel");
                }
                else {
                    $prog{premiere} = [ '' ];
                    $prog{'_repeat'} = 0;
                }
            }
            if ($prog{'_repeat'}) {
                $prog{'previously-shown'} = {};
            }
            if ($prog{'_new_series'}) {
                $prog{new} = 1;
            }
            if ($prog{'_subtitles'}) {
                push @{$prog{subtitles}}, {type=>'teletext'};
            }
            if ($prog{'_deaf_signed'}) {
                push @{$prog{subtitles}}, {type=>'deaf-signed'};
            }
            if (defined $prog{'_certificate'} && $prog{'_certificate'} !~ m/^\s*$/) {
                $prog{rating} = [ [ $prog{'_certificate'}, 'BBFC' ] ];
            }
            if (defined $prog{'_star_rating'}  && $prog{'_star_rating'} !~ m/^\s*$/ && $prog{'_film'}) {
                push @{$prog{'star-rating'}}, [ "" . $prog{'_star_rating'} . "/5", 'Radio Times Film Rating' ];
            }
            if ($prog{'_choice'}) {
                push @{$prog{'star-rating'}}, [ '1/1', 'Radio Times Recommendation' ];
            }

            # Finally, write the programme's XML data to programme list
            push @programmes, \%prog;
            $num_titles++;
            t("\n  ----\n");
        }

        if ($num_titles < 1) {
            $empty_listings{$rt_name} = $rt_listings_uri;
            $chan_warnings++;
            t("  No programmes found for '$rt_name' - check source file");
        }
        else {
            # Write the channel's programme elements to output
            foreach my $prog (@programmes) {
                $writer->write_programme($prog);
            }
            t("  Writing $num_titles <programme> elements for '$rt_name'");
        }

        t("Finished processing listings for '$rt_name' ($xmltv_id)\n");
        t("----");

        # Update the progres bar by one increment
        if (defined $listings_bar) {
            $listings_bar->update();
        }
    }

    if (defined $listings_bar) {
        # Only update the progress bar to 100% if we need to
        if ($need_final_update) {
            $listings_bar->update($num_req_chans);
        }
        $listings_bar->finish();
        if (! $opt->{quiet}) {
            say("\n");
        }
    }
}

sub write_xmltv_footer {
    t("\nWriting XMLTV footer\n");
    $writer->end;
}

# Convenience method for use with XMLTV::Memoize. Only return content
# after a successful response. We require access to the raw octets via
# $resp->content in order to be able to process the data for double and
# mis-encoded UTF-8 content. Calling $resp->decoded_content or using
# LWP::Simple::get() (versions of LWP >=5.827) would not permit this.
sub get_octets {
    my $resp = $ua->get(shift @_);
    if ($resp->is_error) {
        return undef;
    }
    else {
        return $resp->content;
    }
}

# Return the digit equivalent of its word, i.e. "one" -> "1",
# or return the word if it appears to consist of only digits
sub word_to_digit {

    my $word = shift;
    return undef if ! defined $word;
    return $word if $word =~ m/\d+/;
    for (lc $word) {
        if    (m/^one$/)       { return 1 }
        elsif (m/^two$/)       { return 2 }
        elsif (m/^three$/)     { return 3 }
        elsif (m/^four$/)      { return 4 }
        elsif (m/^five$/)      { return 5 }
        elsif (m/^six$/)       { return 6 }
        elsif (m/^seven$/)     { return 7 }
        elsif (m/^eight$/)     { return 8 }
        elsif (m/^nine$/)      { return 9 }
        elsif (m/^ten$/)       { return 10 }
        elsif (m/^eleven$/)    { return 11 }
        elsif (m/^twelve$/)    { return 12 }
        elsif (m/^thirteen$/)  { return 13 }
        elsif (m/^fourteen$/)  { return 14 }
        elsif (m/^fifteen$/)   { return 15 }
        elsif (m/^sixteen$/)   { return 16 }
        elsif (m/^seventeen$/) { return 17 }
        elsif (m/^eighteen$/)  { return 18 }
        elsif (m/^nineteen$/)  { return 19 }
        elsif (m/^twenty$/)    { return 20 }
        # handle 1-8 in roman numberals
        elsif (m/^i$/)         { return 1 }
        elsif (m/^ii$/)        { return 2 }
        elsif (m/^iii$/)       { return 3 }
        elsif (m/^iv$/)        { return 4 }
        elsif (m/^v$/)         { return 5 }
        elsif (m/^vi$/)        { return 6 }
        elsif (m/^vii$/)       { return 7 }
        elsif (m/^viii$/)      { return 8 }
        # return undef if input unhandled
        else                  { return undef }
    }
}

# Display required copyright message from Radio Times
sub display_copyright {
    say("$rt_copyright");
}

# Read in the prog_titles_to_process file
sub load_prog_titles_to_process {
    my $prog_titles_to_process = undef;
    # Retrieve prog_titles_to_process via XMLTV::Supplement
    $prog_titles_to_process
            = GetSupplement("$grabber_name", 'prog_titles_to_process');


    if (defined $prog_titles_to_process) {

        $prog_titles_to_process = decode($source_encoding, $prog_titles_to_process);

        my @prog_titles_to_process = split /[\n\r]+/, $prog_titles_to_process;

        t("\nTitle processing information:\n");
        PROG_TITLE_ENTRY:
        foreach my $line (@prog_titles_to_process) {
            # Comments are allowed if they are at the start of the line
            next PROG_TITLE_ENTRY if ($line =~ m/^#/);
            my @fields = split /\|/, $line;
            # Each entry requires 2 fields
            if (scalar @fields != 2) {
                t("Wrong number of fields in XMLTV prog_titles_to_process entry:\n"
                        . "\t" . $line);
                next PROG_TITLE_ENTRY;
            }
            # The prog_titles_to_process fields are:
            # 1) procesing code
            # 2) title/non-title text to process
            #
            my ($code, $process_text) = @fields;
            if (! defined $code || $code eq '' || $code !~ m/\d+/) {
                t("Invalid title processing code: " . $line . "'");
                next PROG_TITLE_ENTRY;
            }

            if (! defined $process_text || $process_text eq ''
                                       || $process_text !~ m/\w+/) {
                t("Invalid title processing text: " . $line . "'");
                next PROG_TITLE_ENTRY;
            }

            my $idx_char = lc(substr ($process_text, 0, 1));

            # processing codes are documented in prog_titles_to_process file
            if ($code eq '1')  {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 1) {
                    t("[1] Invalid number of fields (need 1) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                push @{$non_title_info{$idx_char}}, $process_text;
                t("[1] Will remove '" . $process_text . "' from title if found");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '2') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 1) {
                    t("[2] Invalid number of fields (need 1) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                push @{$mixed_title_subtitle{$idx_char}}, $process_text;
                t("[2] Will check for subtitle after title for '" . $process_text . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '3') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 1) {
                    t("[3] Invalid number of fields (need 1) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                push @mixed_subtitle_title, $process_text;
                t("[3] Will check for subtitle before title for '" . $process_text . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '4') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 1) {
                    t("[4] Invalid number of fields (need 1) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                push @{$reversed_title_subtitle{$idx_char}}, $process_text;
                t("[4] Will check for reversed title/subtitle for '" . $process_text . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '5') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 2) {
                    t("[5] Invalid number of fields (need 2) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $old_title, $new_title ) = @fields;
                $replacement_titles{$old_title} = $new_title;
                t("[5] Will check for inconsistent title '" . $old_title . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '6') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 2) {
                    t("[6] Invalid number of fields (need 2) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $uncat_title, $cat ) = @fields;
                if (exists $replacement_cats{$uncat_title}) {
                    t("[6] Duplicate category entry seen for title'" . $uncat_title . "'");
                    next PROG_TITLE_ENTRY;
                }
                $replacement_cats{$uncat_title} = $cat;
                t("[6] Will assign title '" . $uncat_title . "' to category '" . $cat . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '7') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 3) {
                    t("[7] Invalid number of fields (need 3) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $ep_title, $old_ep, $new_ep ) = @fields;
                $replacement_episodes{$ep_title}->{$old_ep} = $new_ep;
                t("[7] Will check for inconsistent episode data '" . $old_ep . "' for title '" . $ep_title . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '8') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 4) {
                    t("[8] Invalid number of fields (need 4) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                foreach my $field (@fields) {
                    $field = "" if ! defined $field;
                }
                my( $old_title, $old_ep, $new_title, $new_ep ) = @fields;
                if ($old_title eq '' or $new_title eq '') {
                    t("[8] Ignoring fixup '" . $process_text . "' as old/new title not given");
                    next PROG_TITLE_ENTRY;
                }
                # remember old title so that we can output a debug list of
                # programmes that may also need to be handled via this fixup
                $flagged_title_eps{$old_title} = $old_title;

                my $key = ("" . $old_title . "|" . $old_ep);
                $replacement_title_eps{$key} = [$new_title, $new_ep];
                t("[8] Will update old title/subtitle '" . $old_title . " / " . $old_ep
                        . "' to '" . $new_title . " / " . $new_ep . "'");
                if ($old_title eq $new_title) {
                    t("[8]   Old/new title are the same - change to type 7 title fixup: '" . $process_text . "'");
                }
                if ($old_ep =~ m/^\Q$new_title\E/) {
                    t("[8]   Old ep contains new title - change to type 11 title fixup? '" . $process_text . "'");
                }

                # store titles that are being corrected with an existing "some title..." fixup
                # store the title without a leading "The" or "A" or the trailing "..." for later matching
                if ($new_title =~ m/^(?:The\s+|A\s+)?(.*)\.\.\.$/) {
                    $dotdotdot_titles{$1} = $new_title;
                }

                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '9') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 3) {
                    t("[9] Invalid number of fields (need 3) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $title, $episode, $desc ) = @fields;
                $replacement_ep_from_desc{$title}->{$desc} = $episode;
                t("[9] Will update subtitle to '" . $episode . "' for title '" . $title
                        . "' based on given description '" . $desc . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '10') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 5) {
                    t("[10] Invalid number of fields (need 5) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                foreach my $field (@fields) {
                    $field = "" if ! defined $field;
                }
                my( $old_title, $old_ep, $new_title, $new_ep, $desc ) = @fields;
                if ($old_title eq '' or $new_title eq '' or $desc eq '') {
                    t("[10] Ignoring fixup '" . $process_text . "' as titles/desc not given");
                    next PROG_TITLE_ENTRY;
                }

                my $key = ("" . $old_title . "|" . $old_ep . "|" . $desc);
                $replacement_title_desc{$key} = [$new_title, $new_ep];
                t("[10] Will update old title/subtitle/desc '" . $old_title . " / " . $old_ep
                        . "' to '" . $new_title . " / " . $new_ep . "'");

                # store titles that are being corrected with an existing "some title..." fixup
                # store the title without a leading "The" or "A" or the trailing "..." for later matching
                if ($new_title =~ m/^(?:The\s+|A\s+)?(.*)\.\.\.$/) {
                    $dotdotdot_titles{$1} = $new_title;
                }

                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '11')  {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 2) {
                    t("[11] Invalid number of fields (need 2) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $brand, $new_title ) = @fields;
                if ($brand eq '' or $new_title eq '') {
                    t("[11] Ignoring fixup '" . $process_text . "' as brand/title not given");
                    next PROG_TITLE_ENTRY;
                }

                push @{$demoted_title{$brand}}, $new_title;
                t("[11] Will check for demoted title '" . $new_title . "' for brand '" . $brand . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '12') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 2) {
                    t("[12] Invalid number of fields (need 2) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $film_title, $cat ) = @fields;
                $replacement_cats_film{$film_title} = $cat;
                t("[12] Will re-assign film '" . $film_title . "' to category '" . $cat . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '13') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 2) {
                    t("[13] Invalid number of fields (need 2) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $title, $text_to_remove ) = @fields;
                push @{$subtitle_remove_text{$title}}, $text_to_remove;
                t("[13] Will remove text '" . $text_to_remove . "' from subtitle for title '" . $title . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '14') {
                my @fields = split( /~/, $process_text, -1);
                if (scalar @fields != 2) {
                    t("[13] Invalid number of fields (need 2) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $source_role, $xmltv_role ) = @fields;
                $credits_role_map{lc $source_role} = lc $xmltv_role;
                t("[14] Will remap credits role from '" . $source_role . "' to '" . $xmltv_role . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            else {
                t("Unknown code seen in prog_titles_to_process file,"
                  . " skipping entry '" . $line . "'");
                next PROG_TITLE_ENTRY;
            }
        }
    }
    else {
        if (! $opt->{quiet}) {
            say("Disabling title processing, no information found.");
        }
    }
    if (! $opt->{quiet}) {
        say("\n");
    }
}

# Read in the utf8_fixups file
sub load_utf8_fixups {
    my $utf8_fixups = undef;
    # Retrieve utf8_fixups via XMLTV::Supplement
    $utf8_fixups
            = GetSupplement("$grabber_name", 'utf8_fixups');

    if (defined $utf8_fixups) {
        my @utf8_fixups = split /[\n\r]+/, $utf8_fixups;

        t("\nLoading UTF-8 fixups\n");
        UTF8_FIXUP_ENTRY:
        foreach my $line (@utf8_fixups) {
            # Comments are allowed if they are at the start of the line
            next UTF8_FIXUP_ENTRY if ($line =~ m/^#/);
            my @fields = split /\|/, $line;
            # Each entry requires 2 fields
            if (scalar @fields != 2) {
                t("Wrong number of fields in XMLTV UTF-8 fixup entry:\n"
                        . "\t" . $line);
                next UTF8_FIXUP_ENTRY;
            }

            # The utf8_fixups fields are:
            # 1) bad utf-8 characters to find and replace (as hex)
            # 2) the replacement characters (as hex)
            #
            my ($bad_chars, $replacement) = @fields;

            if (! defined $bad_chars || $bad_chars eq '') {
                t("Invalid UTF-8 fixup regex: '" . $line . "'");
                next UTF8_FIXUP_ENTRY;
            }
            if (! defined $replacement || $replacement eq '') {
                t("Invalid UTF-8 fixup replacement: '" . $line . "'");
                next UTF8_FIXUP_ENTRY;
            }
            # ignore unknown fixup formats
            if ($bad_chars !~ m/\\xEF\\xBF\\xBD/
                    && $bad_chars !~ m/\\xC3\\xAF\\xC2\\xBF\\xC2\\xBD/
                    && $bad_chars !~ m/^\\xC2\\x[8-9][0-9A-F]/) {
                t("Ignoring UTF-8 fixup: '" . $line . "'");
                next UTF8_FIXUP_ENTRY;
            }
            # Remove the \x chars read from the file leaving a simple hex string
            # containing only [0-9A-F] chars
            $replacement =~ s/\\x//g;
            # Now convert each byte (2 hex chars) into its character equivalent
            $replacement =~ s/([0-9A-F][0-9A-F])/chr(hex($1))/eig;

            # Create hashes to store each type of fixup separately. This should
            # improve processing speed by restricting number of fixups checked.
            if ($bad_chars =~ m/\\xEF\\xBF\\xBD/) {
                $utf8_fixups{'EFBFBD'}{$bad_chars} = $replacement;
            }
            elsif ($bad_chars =~ m/\\xC3\\xAF\\xC2\\xBF\\xC2\\xBD/) {
                $utf8_fixups{'C3AFC2BFC2BD'}{$bad_chars} = $replacement;
            }
            elsif ($bad_chars =~ m/^\\xC2\\x[8-9][0-9A-F]/) {
                $utf8_fixups{'C2809F'}{$bad_chars} = $replacement;
            }

            # Process the regex to get a character string to print. We use
            # the preserved hex string during processing
            my $bad_chars_chr = $bad_chars;
            $bad_chars_chr =~ s/\\x//g;
            $bad_chars_chr =~ s/([0-9A-F][0-9A-F])/chr(hex($1))/eig;
            t("UTF-8 fixup: will replace \"" . $bad_chars_chr . "\" with \""
                    . $replacement . "\" if seen");
            next UTF8_FIXUP_ENTRY;
        }
    }
    else {
        if (! $opt->{quiet}) {
            say("No additional UTF-8 fixups were found.");
        }
    }
    if (! $opt->{quiet}) {
        say("\n");
    }
}

# Tidy up any bad characters in the source data. Although the data is provided
# as UTF-8, the text may contain mis-encoded UTF-8 characters or the NULL
# or other extraneous characters which should be corrected where possible.
#
sub process_utf8_fixups {

    # read in the data to be processed, a descriptive name and a URI for it
    my $page = shift;
    my $rt_name = shift;
    my $rt_listings_uri = shift;

    t("  Checking '$rt_name' listings data for bad UTF-8 chars...");
    for ($page) {
        # Programme entries containing RT reviews or updated information
        # may have erroneous CR+SP characters which we tidy up here
        #
        t("    Looking for CR+SP characters...");
        if (s/\x0D\x20//g) {
            t("      Removed CR+SP characters from '$rt_name' listings data");
        }

        # Fix double-encoded UTF-8 characters (4 bytes)
        # =============================================
        #
        # The ISO-8859-1 charset contains 256 codepoints (0x00-0xFF). When
        # encoded into UTF-8, either 1 or 2 bytes are required to encode these
        # characters as follows:
        #
        # ISO-8859-1           UTF-8        Chars in    Bytes      Notes
        #    range         byte(s) range     Range     Required
        #
        #  0x00-0x1F     [00]-[1F]             32         1        Non-printing
        #  0x20-0x7F     [20]-[7F]             96         1        Printing
        #  0x80-0x9F     [C2][80]-[C2][9F]     32         2        Non-printing
        #  0xA0-0xBF     [C2][A0]-[C2][BF]     32         2        Printing
        #  0xC0-0xFF     [C3][80]-[C3][BF]     64         2        Printing
        #
        # A double-encoded UTF-8 character that uses 4 bytes (but should use
        # only 2 if properly encoded) uses the first 2 bytes to contain the
        # UTF-8 representation of the first byte of the proper UTF-8
        # representation of the character, and the second 2 bytes to contain
        # the UTF-8 representation of the second byte.
        #
        # E.g.:
        #
        # The data contains a double-encoded UTF-8 encoding of the A-grave
        # character using 4 bytes. The correct UTF-8 encoding of this character
        # is [C3][80]. The data actually contains the 4 bytes [C3][83][C2][80].
        # [C3][83] is the UTF-8 encoding of [C3], and [C2][80] is the UTF-8
        # encoding of [80]. We therefore replace this 4-byte double-encoding
        # with [C3][80] which is valid UTF-8 and can be successfully encoded
        # into other character encodings if required.
        #
        # The range of Unicode codepoints encoded into 2 bytes in UTF-8 lie in the
        # range [C2-DF][80-BF].
        #
        # http://en.wikipedia.org/wiki/ISO/IEC_8859-1
        # http://en.wikipedia.org/wiki/UTF-8
        # http://www.eki.ee/letter/
        #
        t("    Looking for double-encoded UTF-8 characters...");
        if (m/[\xC3][\x82-\x83][\xC2][\x80-\xBF]/) {
            # first capture each set of double-encoded UTF-8 bytes
            # (4 in total, 2 for each "real" UTF-8 char) into a list
            my @double_bytes = ($page =~ m/[\xC3][\x82-\x83][\xC2][\x80-\xBF]/g);

            # get a unique list of the different doubly encoded bytes
            my %unique_double_bytes;
            foreach(@double_bytes) {
                $unique_double_bytes{$_} = $_;
            }
            # Get a list of unique 4-byte sequences
            @double_bytes = sort values %unique_double_bytes;
            foreach (@double_bytes) {
                t("      Found double-encoded bytes: " . $_);
            }
            # process the list, reading 2 pairs of bytes in each iteration
            foreach (@double_bytes) {
                /([\xC3][\x82-\x83])([\xC2][\x80-\xBF])/;
                my $badbytes_1 = $1;
                my $badbytes_2 = $2;
                # convert each pair of bytes from UTF-8 to ISO-8859-1 to get a single
                # byte from the original pair
                my $goodbyte_1 = encode("iso-8859-1", decode("utf-8", $badbytes_1) );
                my $goodbyte_2 = encode("iso-8859-1", decode("utf-8", $badbytes_2) );
                # finally globally replace each group of 4 bad bytes with
                # the 2 correct replacement bytes
                $page =~ s/$badbytes_1$badbytes_2/$goodbyte_1$goodbyte_2/g;
                t("      Replaced bad bytes '" . $badbytes_1 . $badbytes_2
                                . "' with good bytes '" . $goodbyte_1 . $goodbyte_2 . "'");
            }
        }

        # Fix double-encoded UTF-8 General Punctuation characters (6 bytes)
        # =================================================================
        #
        # Occasionally in the listings we see double-encoded characters from
        # the Unicode General Punctuation range of characters. When encoded
        # into UTF-8 these characters should require 3 bytes. However, when
        # double-encoded they take 6 bytes. During their handling we replace
        # them with their ASCII equivalents which are how the characters are
        # usually included in the listings.
        #
        t("    Looking for double-encoded UTF-8 General Punctuation characters...");
        if (m/[\xC3][\xA2][\xC2][\x80-\x81]/) {
            t("      Replaced double-encoded 6-byte UTF-8 General Punctuation chars");
            s/\xC3\xA2\xC2\x80\xC2\x90/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x91/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x92/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x93/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x94/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x95/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x98/\x27/g; # <27> -> '
            s/\xC3\xA2\xC2\x80\xC2\x99/\x27/g; # <27> -> '
            s/\xC3\xA2\xC2\x80\xC2\x9A/\x27/g; # <27> -> '
            s/\xC3\xA2\xC2\x80\xC2\x9C/\x22/g; # <22> -> "
            s/\xC3\xA2\xC2\x80\xC2\x9D/\x22/g; # <22> -> "
            s/\xC3\xA2\xC2\x80\xC2\x9E/\x22/g; # <22> -> "
            s/\xC3\xA2\xC2\x80\xC2\x9F/\x22/g; # <22> -> "
            s/\xC3\xA2\xC2\x80\xC2\xA6/\x2E\x2E\x2E/g; # <2E><2E><2E> -> ...
        }

        # Fix mis-encoded UTF-8 characters (6/8 bytes)
        # ============================================
        #
        # Frequently seen in the data (especially in film listings) are completely
        # mis-encoded sequences of UTF-8 characters. Each sequence of bad bytes
        # starts with a correctly encoded 2 byte UTF-8 character but it then
        # followed by 2 or 3 mis-encoded ASCII-range characters. When encoded into
        # UTF-8 these ASCII chars should take 1 byte each, but in this situation
        # use 2 bytes which then fail to decode or display correctly.
        #

        # This fixup looks for mis-encoded character sequences in the range
        # [C3][A0-AF][C2][80-BF][C2][80-BF] (6 bytes)
        #
        t("    Looking for mis-encoded [C3][A0-AF] bytes...");
        if (m/[\xC3][\xA0-\xAF][\xC2][\x80-\xBF][\xC2][\x80-\xBF]/) {
            # first capture each sequence of mis-encoded UTF-8 bytes
            # (6 in total)
            my @misencoded_bytes =
                    ($page =~ m/[\xC3][\xA0-\xAF][\xC2][\x80-\xBF][\xC2][\x80-\xBF]/g);
            # get a unique list of the different mis-encoded byte sequences
            my %unique_misencoded_bytes;
            MIS_ENC_BYTE:
            foreach (@misencoded_bytes) {
                # the Unicode Replacement Character is handled below, so ignore here
                # (when double-encoded, it will match the regex above)
                if (m/\xC3\xAF\xC2\xBF\xC2\xBD/) {
                    t("      Ignoring double-encoded Unicode Replacement Character (handled separately)");
                    next MIS_ENC_BYTE;
                }
                $unique_misencoded_bytes{$_} = $_;
            }
            # Get a new list of the unique 6-byte sequences
            @misencoded_bytes = sort values %unique_misencoded_bytes;
            foreach (@misencoded_bytes) {
                t("      Found mis-encoded bytes: " . $_);
            }
            # process the list, reading 4 bytes in each iteration. Bytes
            # 1 and 2 are correct and left untouched, bytes 4 and 6 are
            # extracted and corrected before being output
            foreach (@misencoded_bytes) {
                /([\xC3][\xA0-\xAF])[\xC2]([\x80-\xBF])[\xC2]([\x80-\xBF])/;
                my $goodbytes = $1; # correct, and used in replacement
                my $badbyte_1 = $2; # incorrect byte value
                my $badbyte_2 = $3; # incorrect byte value
                # the bad bytes require 0x40 (DEC 64) to be subtracted from the char
                # value. 0xA0 are a special case and always converted to regular
                # space char (0x20)
                my $goodbyte_1;
                if ($badbyte_1 !~ m/\xA0/) {
                    $goodbyte_1 = chr( (ord $badbyte_1) - 64);
                }
                else {
                    $goodbyte_1 = "\x20";
                }
                my $goodbyte_2;
                if ($badbyte_2 !~ m/\xA0/) {
                    $goodbyte_2 = chr( (ord $badbyte_2) - 64);
                }
                else {
                    $goodbyte_2 = "\x20";
                }
                # finally globally replace each sequence of bad bytes with
                # the correct replacement bytes
                $page =~ s/$_/$goodbytes$goodbyte_1$goodbyte_2/g;
                t("      Replaced mis-encoded [C3][A0-AF] bytes '" . $_
                                . "' with bytes '"
                                . $goodbytes . $goodbyte_1 . $goodbyte_2 . "'");
            }
        }

        # This fixup looks for mis-encoded character sequences in the range
        # [C3][B0-BF][C2][80-BF][C2][80-BF][C2][80-BF] (8 bytes)
        #
        t("    Looking for mis-encoded [C3][B0-BF] bytes...");
        if (m/[\xC3][\xB0-\xBF][\xC2][\x80-\xBF][\xC2][\x80-\xBF][\xC2][\x80-\xBF]/) {
            # first capture each sequence of mis-encoded UTF-8 bytes
            # (8 in total)
            my @misencoded_bytes =
                    ($page =~ m/[\xC3][\xB0-\xBF][\xC2][\x80-\xBF][\xC2][\x80-\xBF][\xC2][\x80-\xBF]/g);
            # get a unique list of the different mis-encoded byte sequences
            my %unique_misencoded_bytes;
            foreach(@misencoded_bytes) {
                $unique_misencoded_bytes{$_} = $_;
            }
            # Get a new list of the unique 8-byte sequences
            @misencoded_bytes = sort values %unique_misencoded_bytes;
            foreach (@misencoded_bytes) {
                t("      Found mis-encoded bytes: " . $_);
            }
            # process the list, reading 5 bytes in each iteration. Bytes
            # 1 and 2 are correct and left untouched, bytes 4, 6 and 8 are
            # extracted and corrected before being output
            foreach (@misencoded_bytes) {
                /([\xC3][\xB0-\xBF])[\xC2]([\x80-\xBF])[\xC2]([\x80-\xBF])[\xC2]([\x80-\xBF])/;
                my $goodbytes = $1; # correct, and used in replacement
                my $badbyte_1 = $2; # incorrect byte value
                my $badbyte_2 = $3; # incorrect byte value
                my $badbyte_3 = $4; # incorrect byte value
                # the bad bytes require 0x40 (DEC 64) to be subtracted from the char
                # value. 0xA0 are a special case and always converted to regular
                # space char (0x20)
                my $goodbyte_1;
                if ($badbyte_1 !~ m/\xA0/) {
                    $goodbyte_1 = chr( (ord $badbyte_1) - 64);
                }
                else {
                    $goodbyte_1 = "\x20";
                }
                my $goodbyte_2;
                if ($badbyte_2 !~ m/\xA0/) {
                    $goodbyte_2 = chr( (ord $badbyte_2) - 64);
                }
                else {
                    $goodbyte_2 = "\x20";
                }
                my $goodbyte_3;
                if ($badbyte_3 !~ m/\xA0/) {
                    $goodbyte_3 = chr( (ord $badbyte_3) - 64);
                }
                else {
                    $goodbyte_3 = "\x20";
                }
                # finally globally replace each sequence of bad bytes with
                # the correct replacement bytes
                $page =~ s/$_/$goodbytes$goodbyte_1$goodbyte_2$goodbyte_3/g;
                t("      Replaced mis-encoded [C3][B0-BF] bytes '" . $_
                                . "' with bytes '"
                                . $goodbytes . $goodbyte_1 . $goodbyte_2 . $goodbyte_3 . "'");
            }
        }

        # Manual Replacements
        # ===================
        #
        # Here we replace specific sequences of characters seen in the source
        # data that cannot be handled automatically above. These include
        # occurences of the Unicode Replace Character (single and double
        # encoded) and other mis-encoded characters.
        #
        # We use a supplemental file to store these fixups to allow updating
        # without needing to update the grabber itself.
        #
        if ($utf8_fixups_status eq 'enabled') {

            # Unicode Replacement Character (U+FFFD)
            # ======================================
            #
            # The UTF-8 source data may also contain the bytes [EF][BF][BD] which
            # are the UTF-8 encoding of the Unicode Replacement Character U+FFFD.
            # It is likely that these are introduced during preparation of the
            # listings data by the Radio Times, as any characters that cannot be
            # understood are replaced by this character.
            #
            t("    Looking for Unicode Replacement Character...");
            if (m/\xEF\xBF\xBD/) {
                if (%utf8_fixups && exists $utf8_fixups{'EFBFBD'}) {
                    foreach my $bad_chars (keys %{$utf8_fixups{'EFBFBD'}}) {
                        my $replacement = $utf8_fixups{'EFBFBD'}{$bad_chars};
                        # Search for the regex string and replace with char string
                        if ($page =~ s/$bad_chars/$replacement/g) {
                            t("      Replaced Unicode Replacement Character with \""
                                    . $replacement . "\"");
                        }
                    }
                }
                if ($page =~ s/\xEF\xBF\xBD/\x3F/g) {
                    t("    After fixups, data for '$rt_name' still contains Unicode "
                            . "Replacement character. Replaced with \"?\"\n");
                    $hadEFBFBD{$rt_name} = $rt_listings_uri;
                }
            }

            # Double-encoded Unicode Replacement Character (6 bytes)
            # ======================================================
            #
            # The correct encoding for the Unicode Replacement Character is
            # [EF][BF][BD], however it has been seen double-encoded in the listings
            # data as [C3][AF][C2][BF][C2][BD]. As with the normal replacement
            # character, there is no way to determine which replacement character
            # to use in this case, so we substitute a '?' char if we cannot handle
            # the specific occurence. This error needs to have been seen at least
            # once in source data to be able to construct a suitable fixup.
            #
            t("    Looking for double-encoded Unicode Replacement Character...");
            if (m/\xC3\xAF\xC2\xBF\xC2\xBD/) {
                if (%utf8_fixups && exists $utf8_fixups{'C3AFC2BFC2BD'}) {
                    foreach my $bad_chars (keys %{$utf8_fixups{'C3AFC2BFC2BD'}}) {
                        my $replacement = $utf8_fixups{'C3AFC2BFC2BD'}{$bad_chars};
                        # Search for the regex string and replace with char string
                        if ($page =~ s/$bad_chars/$replacement/g) {
                            t("      Replaced double-encoded Unicode Replacement Character with \""
                                    . $replacement . "\"");
                        }
                    }
                }
                if ($page =~ s/\xC3\xAF\xC2\xBF\xC2\xBD/\x3F/g) {
                    t("    After fixups, data for '$rt_name' still contains "
                        . "double-encoded Unicode Replacement character. "
                        . "Replaced with \"?\"\n");
                    $hadC3AFC2BFC2BD{$rt_name} = $rt_listings_uri;
                }
            }

            # Mis-encoded characters in range [C2][80-9F]
            # ===========================================
            #
            # Single characters that are seen in the source data as bytes in the
            # range [C2][80-9F] that UTF-8 decode as non-printing characters
            # instead of their intended character.
            #
            t("    Looking for mis-encoded characters in range [C2][80-9F]...");
            if (m/\xC2[\x80-\x9F]/) {
                if (%utf8_fixups && exists $utf8_fixups{'C2809F'}) {
                    foreach my $bad_chars (keys %{$utf8_fixups{'C2809F'}}) {
                        my $replacement = $utf8_fixups{'C2809F'}{$bad_chars};
                        # Search for the regex string and replace with char string
                        if ($page =~ s/$bad_chars/$replacement/g) {
                            t("      Replaced mis-encoded characters \"" . $bad_chars
                                    . "\" with \"". $replacement . "\"");
                        }
                    }
                }
            }
        }

        # With manual replacements handled above, finally remove any
        # remaining bad/non-printing characters we find

        # Replacements for specific strings seen in source data
        # =====================================================
        #
        t("    Looking for specific strings to replace...");
        # Replacement for Pound Sterling symbol seen as {pound}
        if (s/\x7B\x70\x6F\x75\x6E\x64\x7D/\xC2\xA3/g) {
            t("      Replaced \"{pound}\" with Pound Sterling symbol");
        }
        # Replace any non-breaking (NBSP) space chars with regular spaces
        if (s/\xC2\xA0/\x20/g) {
            t("      Replaced non-breaking spaces with regular spaces");
        }

        # Remove any remaining non-printing control characters (keep
        # \t \n and \r). Refer to above table for ISO-8859-1 and UTF-8 Unicode
        # encodings for chars.
        #
        # First, chars in UTF-8 range [00-1F] (ISO-8859-1 range [00-1F])
        if (s/[\x00-\x08\x0B-\x0C\x0E-\x1F]//g) {
            t("    Removed non-printing characters (range [00]-[1F]) from "
                    . "'$rt_name' listings data");
        }

        # Next, remove any remaining byte pairs in UTF-8 range [C2][7F-9F]
        # (ISO-8859-1 range [7F-9F]) (non-printing)
        if (s/[\xC2][\x7F-\x9F]//g) {
            t("    Removed non-printing characters (range [C2][7F-9F]) from "
                    . "'$rt_name' listings data");
            $hasC27F9Fchars{$rt_name} = $rt_listings_uri;
        }
    }

    return $page;
}

# Validate the key %prog fields (title/date/time) for a programme
sub validate_key_fields {
    my $prog = shift;

    if (! defined $prog->{'_title'}) {
        t("  Missing title in entry, skipping");
        return undef;
    }
    if (! defined $prog->{'_date'}) {
        t("  Missing date in entry, skipping");
        return undef;
    }
    if (! defined $prog->{'_start'}) {
        t("  Missing start time in entry, skipping");
        return undef;
    }
    if (! defined $prog->{'_duration_mins'}) {
        t("  Missing duration in entry, skipping");
        return undef;
    }
    if ($prog->{'_date'} !~ m{^\d\d/\d\d/\d{4}$}) {
        t("  Bad date '" . $prog->{'_date'} . "' detected for '" . $prog->{'_title'} . "', skipping");
        return undef;
    }
    if ($prog->{'_start'} !~ m/\d\d:\d\d/) {
        t("  Bad start time '" . $prog->{'_start'} . "' detected for '" . $prog->{'_title'} . "', skipping");
        return undef;
    }
    if ($prog->{'_duration_mins'} !~ m/\d+/) {
        t("  Bad duration '" . $prog->{'_duration'} . "' detected for '" . $prog->{'_title'} . "', skipping");
        return undef;
    }
    if ($prog->{'_duration_mins'} == 0) {
        t("  Zero duration detected for '" . $prog->{'_title'} . "', skipping");
        return undef;
    }
    return $prog;
}

# Check boolean fields for valid data. Update 'true'/'false'
# strings to '1'/'0' values
sub validate_boolean_field {
    my $prog = shift;
    my $field = shift;

    if (! defined $prog->{$field}) {
        t("  A required true/false value was undefined for '"
                . $prog->{'_title'} . "', skipping");
        return undef;
    }
    if ($prog->{$field} !~ m/(true|false)/i) {
        t("  A bad true/false value '$prog->{$field}' was seen for '"
                . $prog->{'_title'} . "', skipping");
        return undef;
    }

    $prog->{$field} = 1 if $prog->{$field} eq 'true';
    $prog->{$field} = 0 if $prog->{$field} eq 'false';

    return $prog;
}

# Check for any DST-related information the RT may include in the title
# for a programme. If we find any explicit DST information we store it
# for use later and remove it from the title.
sub check_explicit_tz_in_title {
    my $prog = shift;

    if ($prog->{'_title'} =~ s/^\((GMT|BST)\)\s*//) {
        $prog->{'_explicit_tz'} = $1;
    }
}

# Remove any last-minute scheduling info inserted into regular
# description that will affect later regexes.
sub remove_updated_listing_desc {
    my $prog = shift;

    if (defined $prog->{'_desc'}) {
        $prog->{'_desc'} =~ s/\s+/ /g;
        if ($prog->{'_desc'} =~ s/\s?(?:UPDATED|UPADTED)\s+LISTING\s?(?:-|:|@)\s?(.*)$//i) {
            $prog->{'_updated_listing_info'} = $1;
            t("  Removed updated listing information:\n" . "    '" . $1 . "'");
        }
    }
}

# Episode/series numbering is provided in the sub_title field in the source
# data, which is parsed out if seen. Retain episode $sub_title data if
# $episode contains only episode numbering.
sub check_numbering_in_subtitle {
    my $prog = shift;

    if (defined $prog->{'_sub_title'}) {
        extract_numbering_from_sub_title($prog);

        # sub_title should be empty after successful parsing
        if ($prog->{'_sub_title'} eq '') {
            $prog->{'_sub_title'} = undef;
        }
        # text left in sub_title is most likely _episode info, so move it to _episode
        else {
            if (! defined $prog->{'_episode'}) {
                t("  Using sub-title '" . $prog->{'_sub_title'} . "' as episode not given");
                $prog->{'_episode'} = $prog->{'_sub_title'};
                $prog->{'_sub_title'} = undef;
            }
            else {
                t("  Merging episode '" . $prog->{'_episode'} . "' with sub_title '" . $prog->{'_sub_title'} . "'");
                $prog->{'_episode'} = $prog->{'_episode'} . ": " . $prog->{'_sub_title'};
                $prog->{'_sub_title'} = undef;
            }
        }
    }
}

# Check for null or invalid release year
sub validate_year_field {
    my $prog = shift;

    if (defined $prog->{'_year'}) {
        if ($prog->{'_year'} =~ m/null/i) {
            t("  Null release year given for this programme.");
            $prog->{'_year'} = undef;
        }
        elsif ($prog->{'_year'} !~ m/\d{4}/) {
            t("  Invalid release year given for this programme.");
            $prog->{'_year'} = undef;
        }
    }
}

# Remove production year from $episode for films only
# If we do not already have a valid prod year, use the year
# detected
sub remove_year_from_episode {
    my $prog = shift;

    if (defined $prog->{'_episode'}) {
        if ($prog->{'_film'} && $prog->{'_episode'} =~ s/Prod Year (\d{4})//i) {
            t("  Removed production year info from episode details");
            $prog->{'_episode'} = undef;
            if (! defined $prog->{'_year'}) {
                $prog->{'_year'} = $1;
            }
        }
    }
}

# Remove bad chars from $title text before further processing
sub tidy_title_text {
    my $prog = shift;

    # remove vertical bar/colon
    if ($prog->{'_title'} =~ s/([|:])$//) {
        t("  Removed '" . $1 . "' from end of title");
    }
}

# Some listings appear to be added without being processed upstream
# to provide subtitle (episode) information. The titles of these
# programmes are uppercase and may contain season numbering. Here
# we monitor these before further title processing is carried out.
sub check_uppercase_titles {
    my $prog = shift;

    if ($opt->{debug} && ($prog->{'_title'} eq uc($prog->{'_title'}))) {
        $uc_prog_titles{$prog->{'_title'}} = $prog->{'_title'};
    }
}

# Remove "New $title" from episode field
#
# Listings for programmes may be provided that contain
# "New $title" duplicated at the start of the episode field
#
sub remove_duplicated_new_title_in_ep {
    my $prog = shift;

    if (defined $prog->{'_episode'}) {
        my $tmp_title = $prog->{'_title'};
        my $tmp_episode = $prog->{'_episode'};
        my $key = $tmp_title . "|" . $tmp_episode;

        # Remove the "New $title" text from episode field if we find it
        if ($tmp_episode =~ m/^New \Q$tmp_title\E(?::\s|\s-\s)(.+)$/) {
            $prog->{'_episode'} = $1;
            t("      Removing 'New \$title' text from beginning of episode field");
            if ($opt->{debug}) {
                $new_title_in_subtitle_fixed{$key} = { 'title'    => $tmp_title,
                                                       'episode'  => $tmp_episode,
                                                     };
            }
        }
    }
}

# Remove duplicated programme title *and* episode from episode field
#
# Listings for programmes may be provided that contain the
# programme title *and* episode duplicated in the episode field:
# i) at the start separated from the episode by colon - $title: $episode: $episode
sub remove_duplicated_title_and_ep_in_ep {
    my $prog = shift;

    if (defined $prog->{'_episode'}) {
        my $tmp_title = $prog->{'_title'};
        my $tmp_episode = $prog->{'_episode'};
        my $key = $tmp_title . "|" . $tmp_episode;

        # Remove the duplicated title/ep from episode field if we find it
        # Use a backreference to match the second occurence of the episode text
        if ($tmp_episode =~ m/^\Q$tmp_title\E:\s(.+):\s\1$/) {
            $prog->{'_episode'} = $1;
            t("      Removing duplicated title/ep text from episode field");
            if ($opt->{debug}) {
                $title_ep_in_subtitle_fixed{$key} = { 'title'    => $tmp_title,
                                                      'episode'  => $tmp_episode,
                                                    };
            }
        }
    }
}

# Remove duplicated programme title from episode field
#
# Listings for programmes may be provided that contain the
# programme title duplicated in the episode field, either:
# i) at the start followed  by the 'real' episode in parentheses (rare);
# ii) at the start separated from the episode by a colon/hyphen; or
# iii) at the end separated from the episode by a colon/hyphen
#
sub remove_duplicated_title_in_ep {
    my $prog = shift;

    if (defined $prog->{'_episode'}) {
        my $tmp_title = $prog->{'_title'};
        my $tmp_episode = $prog->{'_episode'};
        my $key = $tmp_title . "|" . $tmp_episode;

        # Remove the duplicated title from episode field if we find it
        if ($tmp_episode =~ m/^\Q$tmp_title\E(?::\s|\s-\s)(.+)$/ || $tmp_episode =~ m/^\Q$tmp_title\E\s+\((.+)\)$/) {
            $prog->{'_episode'} = $1;
            t("      Removing title text from beginning of episode field");
            if ($opt->{debug}) {
                $title_in_subtitle_fixed{$key} = { 'title'    => $tmp_title,
                                                   'episode'  => $tmp_episode,
                                                 };
            }
        }
        # Look for title appearing at end of episode field
        elsif ($tmp_episode =~ m/^(.+)(?::\s|\s-\s)\Q$tmp_title\E$/) {
            $prog->{'_episode'} = $1;
            t("      Removing title text from end of episode field");
            if ($opt->{debug}) {
                $title_in_subtitle_fixed{$key} = { 'title'    => $tmp_title,
                                                   'episode'  => $tmp_episode,
                                                 };
            }
        }
    }
}

# Process programme against supplemental title fixups
sub process_title_fixups {
    my $prog = shift;

    # Remove non-title text found in programme title.
    #
    # Applied to all titles in the source data (type = 1)
    process_non_title_info($prog);

    # Track when titles/subtitles have been updated to allow
    # short-circuiting of title processing
    $prog->{'_titles_processed'} = 0;
    $prog->{'_subtitles_processed'} = 0;

    # Next, process titles to make them consistent
    #
    # One-off demoted title replacements (type = 11)
    if (! $prog->{'_titles_processed'}) {
        process_demoted_titles($prog);
    }
    # One-off title and episode replacements (type = 10)
    if (! $prog->{'_titles_processed'}) {
        process_replacement_titles_desc($prog);
    }
    # One-off title and episode replacements (type = 8)
    if (! $prog->{'_titles_processed'}) {
        process_replacement_titles_episodes($prog);
    }
    # Look for $title:$episode in source title (type = 2)
    if (! $prog->{'_titles_processed'}) {
        process_mixed_title_subtitle($prog);
    }
    # Look for $episode:$title in source title (type = 3)
    if (! $prog->{'_titles_processed'}) {
        process_mixed_subtitle_title($prog);
    }
    # Look for reversed title and subtitle information (type = 4)
    if (! $prog->{'_titles_processed'}) {
        process_reversed_title_subtitle($prog);
    }
    # Look for inconsistent programme titles (type = 5)
    #
    # This fixup is applied to all titles (processed or not) to handle
    # titles split out in fixups of types 2-4 above
    process_replacement_titles($prog);

    # Next, process subtitles to make them consistent
    #
    # Remvoe text from programme subtitles (type = 13)
    if (! $prog->{'_subtitles_processed'}) {
        process_subtitle_remove_text($prog);
    }
    # Look for inconsistent programme subtitles (type = 7)
    if (! $prog->{'_subtitles_processed'}) {
        process_replacement_episodes($prog);
    }
    # Replace subtitle based on description (type = 9)
    if (! $prog->{'_subtitles_processed'}) {
        process_replacement_ep_from_desc($prog);
    }

    # Last, provide/update a programme's category based on 'corrected' title
    # (types = 6,12)
    process_replacement_genres($prog);
}

# Check for potential episode numbering that still remains in the title
# or episode fields
sub check_potential_numbering_in_text {
    my $prog = shift;

    extract_numbering_from_episode($prog);
    extract_numbering_from_title($prog);
    extract_part_numbering_from_episode($prog);

    # after processing see if $title contains "season" text that should
    # probably be removed
    if ($opt->{debug} && $prog->{'_title'} =~ m/season/i) {
        t("      Title text contains \"Season\":  " . $prog->{'_title'});
        $title_text_to_remove{$prog->{'_title'}} = $prog->{'_title'};
    }

    # after processing see if $episode contains "series" text
    if ($opt->{debug} && defined $prog->{'_episode'} && $prog->{'_episode'} =~ m/series/i) {
        t("      Possible series numbering still seen:  " . $prog->{'_episode'});
        $possible_series_nums{$prog->{'_episode'}} = $prog->{'_episode'};
    }

    # check for potential episode numbering left unprocessed
    if ($opt->{debug} && defined $prog->{'_episode'}
            && ($prog->{'_episode'} =~ m/^\d{1,2}\D/ || $prog->{'_episode'} =~ m/\D\s+\d{1,2}$/)
            && $prog->{'_episode'} !~ m/(Part|Pt(\.)?)(\d+|\s+\w+)/
            && $prog->{'_episode'} !~ m/^\d\d\/\d\d\/\d\d(\d\d)?/) {
        t("      Possible episode numbering still seen: " . $prog->{'_episode'});
        $possible_episode_nums{$prog->{'_episode'}} = $prog->{'_episode'};
    }
}

# Set $episode to undefined if empty/whitespace
sub tidy_episode_text {
    my $prog = shift;

    if (defined $prog->{'_episode'} && $prog->{'_episode'} =~ m/^\s*$/) {
        $prog->{'_episode'} = undef;
    }
}

# Store a variety of title debugging information for later analysis
# and debug output
sub store_title_debug_info {
    my $prog = shift;

    if ($opt->{debug}) {
        # Monitor for case/punctuation-insensitive title variations
        my $title_nopunc = lc $prog->{'_title'};
        $title_nopunc =~ s/^the\s+//;
        $title_nopunc =~ s/(\s+and\s+|\s+&\s+)/ /g;
        $title_nopunc =~ s/\s+No 1s$//g;
        $title_nopunc =~ s/\s+No 1's$//g;
        $title_nopunc =~ s/\s+Number Ones$//g;
        $title_nopunc =~ s/' //g;
        $title_nopunc =~ s/'s/s/g;
        $title_nopunc =~ s/\W//g;
        # count number of each variant by genre and channel name
        my $tmp_genre;
        $tmp_genre = $prog->{'_genre'}; $tmp_genre = "No Genre" if not defined $tmp_genre;
        $case_insens_titles{$title_nopunc}{ $prog->{'_title'} }{$tmp_genre}{ $prog->{'_rt_name'} }++;
        $case_insens_titles{$title_nopunc}{ $prog->{'_title'} }{'count'}++;

        # Check for title text still present in episode details
        if (defined $prog->{'_episode'} && $prog->{'_episode'} =~ m/^\Q$prog->{'_title'}\E.*$/) {
            my $key = $prog->{'_title'} . "|" . $prog->{'_episode'};
            $title_in_subtitle_notfixed{$key} = { 'title'    => $prog->{'_title'},
                                                  'episode'  => $prog->{'_episode'},
                                                };
        }

        # Check for episode details that contain a colon/hyphen - these may indicate
        # a title in the episode field that needs to be moved into the title field
        if (defined $prog->{'_episode'} && $prog->{'_episode'} =~ m/(:|\s+-\s+)/) {
            my $key = $prog->{'_title'} . "|" . $prog->{'_episode'};
            $colon_in_subtitle{$key} = { 'title'    => $prog->{'_title'},
                                         'episode'  => $prog->{'_episode'},
                                        };
        }

        # Add title to the list of programme titles for later debugging.
        # Likewise for films, but store in separate hash.
        if (! $prog->{'_film'}) {
            $prog_titles{$prog->{'_title'}} = $prog->{'_title'};
        }
        else {
            $film_titles{$prog->{'_title'}} = $prog->{'_title'};
        }
    }
}

# Occasionally film listings contain the title duplicated in the
# $episode field, so we remove it here
sub check_duplicated_film_title {
    my $prog = shift;

    if ($prog->{'_film'} && defined $prog->{'_episode'}
                         && (uc $prog->{'_title'} eq uc $prog->{'_episode'})) {
        $prog->{'_episode'} = undef;
    }
}

# Check for films without a valid release year (absent or null string
# seen in source data)
sub check_missing_film_year {
    my $prog = shift;

    if ($prog->{'_film'} && ! defined $prog->{'_year'}) {
        t("    No release year given for this film.");
    }
}

# Tidy description text
sub tidy_desc_text {
    my $prog = shift;
    if (defined $prog->{'_desc'}) {
        $prog->{'_desc'} =~ s/\s+/ /g;
    }
}

# Update the premiere/repeat flags based on contents of programme desc
sub update_premiere_repeat_flags_from_desc {
    my $prog = shift;

    if (defined $prog->{'_desc'}) {
        # Check if desc start with "Premiere.". Remove if found and set flag
        if ($prog->{'_desc'} =~ s/^Premiere\.\s*//) {
            t("    Setting premiere flag based on description (Premiere. )");
            $prog->{'_premiere'} = 1;
        }

        # Flag showings described as repeats
        elsif ($prog->{'_desc'} =~ m/^Another chance/) {
            t("    Setting repeat flag based on description (Another chance...)");
            $prog->{'_repeat'} = 1;
        }

        # Check if desc starts with "New series..."
        elsif ($prog->{'_desc'} =~ m/^New series/) {
            t("    Setting premiere flag based on description (New series...)");
            $prog->{'_premiere'} = 1;

            # Now check if desc starts with "New series [(x/y)]. "
            # Remove text and preserve numbering for processing below
            if ($prog->{'_desc'} =~ m/^New series(\s*\d+\/\d+\s*)?\.\s*/i) {
                $prog->{'_desc'} =~ s/^New series\s*//i;
                $prog->{'_desc'} =~ s/^\s*\.\s*//i;
            }
        }
    }
}

# Check for potential season/episode numbering in description. Only
# use numbering found in the desc if we have not already found it
# elsewhere (i.e. prefer data provided in the subtitle field of the
# raw data).
#
sub extract_numbering_from_desc {
    my $prog = shift;

    if (defined $prog->{'_desc'}) {
        # Extract episode and series info from start of description
        # "1/6. ..."
        # "1/6; series one. ..."
        if ($prog->{'_desc'} =~
            s{
            ^                    # start at beginning of episode details
            (\d+)                # CAPTURE the first number(s) found ($episode_num)
            \s*                  # ignore any whitespace
            (?:&\d+)?            # check for "&2" details relating to following episode
            \s*                  # ignore any whitespace
            \/                   # forward slash
            \s*                  # ignore any whitespace
            (\d+)                # CAPTURE the second number(s) found ($num_episodes)
            \s*                  # ignore any whitespace
            (?:\.|;)?            # check for punctuation characters
            \s*                  # ignore any whitespace
            (?:series\s+(\w+)\s*\.)?
                                # check for series number information ($series_num)
            \s*                  # ignore any whitespace
            }
            {}ix ) {
                my $updated_from_desc = 0;
                if ( ! defined $prog->{'_episode_num'} && ! defined $prog->{'_num_episodes'} ) {
                    $prog->{'_episode_num'} = $1 - 1;
                    $prog->{'_num_episodes'} = $2;
                    t("    Episode number found: episode $1 of $2 (desc)");
                    $updated_from_desc++;
                }
                else {
                    t("    Ignoring episode numbering seen in desc (episode $1 of $2)");
                }
                if ( defined $3 ) {
                    my $series_digits = word_to_digit($3);

                    if ( ! defined $prog->{'_series_num'} ) {
                        if (defined $series_digits and $series_digits > 0) {
                            $prog->{'_series_num'} = $series_digits - 1;
                            t("    Series number found: series $series_digits (parsed as $3 from desc)");
                            $updated_from_desc++;
                        }
                    }
                    else {
                        t("    Ignoring series numbering seen in desc (series $series_digits)");
                    }
                }
                $prog->{'_processed'} = 1 if $updated_from_desc;
        }
    }
}

# Create episode numbering based on information extracted from $episode
# and $desc fields.
sub generate_episode_numbering {
    my $prog = shift;

    # series number is zero-indexed
    if (! defined $prog->{'_series_num'} || $prog->{'_series_num'} < 0) {
        $prog->{'_series_num'} = '';
    }
    # episode number is zero-indexed
    if (! defined $prog->{'_episode_num'} || $prog->{'_episode_num'} < 0) {
        $prog->{'_episode_num'} = '';
    }
    # episode total is one-indexed and should always be greater than the
    # max episode number (which is zero-indexed)
    if (defined $prog->{'_num_episodes'}
                && $prog->{'_num_episodes'} > 0
                && $prog->{'_num_episodes'} > $prog->{'_episode_num'} ) {
        $prog->{'_num_episodes'} = "/" . $prog->{'_num_episodes'};
    }
    else {
        $prog->{'_num_episodes'} = '';
    }
    # Create details if we have the series and/or episode numbers
    if ($prog->{'_series_num'} ne '' || $prog->{'_episode_num'} ne '') {
        return "" . $prog->{'_series_num'}   . "." . $prog->{'_episode_num'}
                  . $prog->{'_num_episodes'} . "." . "";
    }
    return undef;
}

# Create cast list based on various cast formats seen in source data
sub generate_cast_list {
    my $p = shift;
    my $cast = $p->{'_cast'};

    # The Radio Times data includes cast information in 2 formats:
    #
    # a) pairings of 'character*actor' with subsequent pairings
    #    separated by '|' - '*' does not appear in any text
    # b) a comma separated list of actors with no character details
    #
    # If 'Director' appears in the character entry, this is to be used
    # as a regular cast member, not the programme's director
    if (defined $cast) {

        my $credits;
        $cast =~ s/\s+/ /g;   # remove extra spaces
        $cast =~ s/\|\|/\|/g; # remove empty pipe-separated fields
        $cast =~ s/,,/,/g;    # remove empty comma-separated fields

        # First we check for 'character*actor' entries
        if ($cast =~ tr/*//) {
            my @castlist;
            # Multiple 'character*actor'entries
            if ($cast =~ tr/|//) {
                @castlist = split /\|/, $cast;
            }
            # Single 'character*actor' entry
            else {
                push @castlist, $cast;
            }

            # role debugging for non-actor role mapping
            my $seen_valid_roles = 0;
            my $seen_actor_roles = 0;

            my @crew = ();
            my @actors = ();

            # Now process the list of cast entries
            ENTRY:
            foreach my $entry (@castlist) {

                # Check for bad cast entries
                next ENTRY if ($entry !~ m/^[^*]+[*]/);

                # Populate cast list against known production roles if possible,
                # otherwise as character names for actors. We use a LUT to map
                # roles seen in the source listings to valid XMLTV roles.
                #
                # Typically we'll see either only actor credits (e.g. for
                # entertainment programmes) or crew credits (for non-fiction
                # programming). We look to see which type of credits we've
                # seen therefore before deciding whether to i) ignore unknown
                # roles, or ii) assign them as acting roles
                #
                my ($given_role, $name) = split /\*/, $entry;

                # Replace any actor given as Himself/Herself with the
                # character name given
                if ($given_role =~ m/^(Himself|Herself|Themselves)$/i) {
                    $given_role = $name;
                }

                $given_role = get_valid_xmltv_role($given_role);

                if (grep {$given_role =~ /^$_$/i} @valid_roles) {
                    t("    Found valid crew role: " . $given_role);
                    push @crew, [ $given_role, $name ];
                    # push @{$p->{credits}{$given_role}}, [$name];
                    $seen_valid_roles++;
                }
                elsif ($seen_valid_roles >= 1 && $seen_actor_roles == 0) {
                    # It's not a role that we currently handle, but we've
                    # seen other crew roles, so let's remember it
                    # t("    Found possible crew role: " . $given_role);
                    $seen_roles{$given_role}++;
                }
                else {
                    t("    Found possible actor role: " . $given_role . " - " . $name);
                    push @actors, [ $given_role, $name ];
                    # push @{$p->{credits}{'actor'}}, [$name, $given_role];
                    $seen_actor_roles++;
                }
            }

            # Prefer actors if we've seen 2 or fewer crew roles
            if ($seen_actor_roles >= 1 && $seen_valid_roles <=2) {
                foreach my $actor ((@actors, @crew)) {
                    push @{$p->{credits}{'actor'}}, [encode($xml_encoding, $actor->[1]), encode($xml_encoding, $actor->[0])];
                }
            }
            # Otherwise, prefer crew roles
            else {
                foreach my $actor (@crew) {
                    push @{$p->{credits}{$actor->[0]}}, encode($xml_encoding, $actor->[1]);
                }
            }
        }
        # Next we check for CSV-style actor entries
        elsif ($cast =~ tr/,//) {
            foreach my $actor (split /,/, $cast) {
                push @{$p->{credits}{actor}}, [ encode($xml_encoding, $actor) ];
            }
        }
        # Finally assume a single name that contains neither '*' nor ','
        else {
            $p->{credits}{actor} = [ encode($xml_encoding, $cast) ];
        }
    }
}

# Lookup a given credits role to see if it is a valid (or mapped-to-valid)
# role. Return the valid XMLTV role if we find one.
sub get_valid_xmltv_role {
    my $role = shift;
    if (exists $credits_role_map{lc $role}) {
        return $credits_role_map{lc $role};
    }
    return $role;
}

# Store details of uncategorised programmes, programmes having different
# genres throughout the listings, and films having a duration of less than
# 75 minutes for further analysis
sub store_genre_debug_info {
    my $prog = shift;

    if ($opt->{debug}) {
        if (defined $prog->{'_genre'} && ! $prog->{'_film'}) {
            $categories{$prog->{'_genre'}} = $prog->{'_genre'};
            if ($prog->{'_genre'} =~ m/^(No Genre)$/
                    && $prog->{'_title'} !~ m/^(To Be Announced|TBA|Close)$/) {
                $uncategorised_progs{$prog->{'_title'}} = $prog->{'_title'};
            }
            # Track programmes categorised as reality, but ignore any that we have explicitly set
            elsif ($prog->{'_genre'} =~ m/^Reality$/ && ! exists $replacement_cats{$prog->{'_title'}}) {
                $reality_progs{$prog->{'_title'}} = $prog->{'_title'};
            }
            $cats_per_prog{$prog->{'_title'}}{$prog->{'_genre'}}++;
        }
        elsif ($prog->{'_film'} and ($prog->{'_duration_mins'} < 75)) {
            $short_films{$prog->{'_title'}} = $prog->{'_title'};
        }
        elsif (! defined $prog->{'_genre'} && $prog->{'_title'} !~ m/^(To Be Announced|TBA|Close)$/) {
            $uncategorised_progs{$prog->{'_title'}} = $prog->{'_title'};
        }
    }
}


# Broadcast date, start/stop times, and timezone adjustments.
#
# For each programme entry, the Radio Times data includes the
# date at start of broadcast, the start time and the stop time.
#
# The Radio Times sometimes explicitly flags a programme's start/stop
# times as being in a specific timezone (GMT or BST). We parse this
# information out when processing the programme's title and apply it
# to the start time of any such programmes. Flagged programmes are
# seen in the data in March and October, when British Summer Times
# begins and ends.
#
# We calculate the programme's stop time using the
# UTC-offset-corrected start time and its stated length. This allows
# us to handle occasions when programmes having mixed GMT/BST
# timings are not flagged.
#
# The Summer Time Order of 2002 defines British Summer Time as
# "...the period beginning at one o'clock, Greenwich mean time, in
# the morning of the last Sunday in March and ending at one o'clock,
# Greenwich mean time, in the morning of the last Sunday in October."
sub generate_start_stop_times {
    my $prog = shift;
    my $ts_dt = shift;

    my ($dd, $mm, $yyyy)      = ($prog->{'_date'}  =~ m{(\d\d)/(\d\d)/(\d{4})});
    my ($start_hr, $start_mn) = ($prog->{'_start'} =~ m/(\d\d):(\d\d)/);
    t("    Start time given as '" . $yyyy . "/" . $mm . "/" . $dd . " "
            . $start_hr . ":" . $start_mn . "', duration " . $prog->{'_duration_mins'} . " mins");

    # Use explicit GMT/BST information if found in title
    my $tz = 'Europe/London';
    if (defined $prog->{'_explicit_tz'}) {
        t("    Explicit timezone '" . $prog->{'_explicit_tz'} . "' detected in title");
        if ($prog->{'_explicit_tz'} eq 'GMT') {
            t("    Forcing timezone to GMT/+0000");
            $tz = '+0000';
        }
        elsif ($prog->{'_explicit_tz'} eq 'BST') {
            t("    Forcing timezone to BST/+0100");
            $tz = '+0100';
        }
    }

    # Determine start time with correct UTC offset
    my $start_dt = DateTime->new(
        year       => $yyyy,
        month      => $mm,
        day        => $dd,
        hour       => $start_hr,
        minute     => $start_mn,
        second     => 0,
        time_zone  => $tz,
    );

    $prog->{start} = "" . $start_dt->ymd('') . $start_dt->hms('') . ' ' . DateTime::TimeZone->offset_as_string($start_dt->offset);
    t("    " . $prog->{start} . " - Start time");

    # Determine stop time with correct UTC offset by adding duration of
    # programme to start time
    my $dur = DateTime::Duration->new( minutes => $prog->{'_duration_mins'} );
    my $stop_dt = $start_dt + $dur;
    # Ensure correct timezone where TZ was explicitly flagged
    $stop_dt = $stop_dt->set_time_zone('Europe/London') if (defined $prog->{'_explicit_tz'});

    $prog->{stop}  = "" . $stop_dt->ymd('') . $stop_dt->hms('') . ' ' . DateTime::TimeZone->offset_as_string($stop_dt->offset);
    t("    " . $prog->{stop}  . " - Stop time");

    # Now we have determined the correct start/stop times for the programme
    # add any required timeshift defined in channel_ids and preserve the
    # correct timezone information
    #
    if (defined $channel_offset{ $prog->{channel} }) {
        $start_dt = $start_dt + $ts_dt;
        $stop_dt  = $stop_dt + $ts_dt;

        $prog->{start} = "" . $start_dt->ymd('') . $start_dt->hms('') . ' ' . DateTime::TimeZone->offset_as_string($start_dt->offset);
        $prog->{stop}  = "" . $stop_dt->ymd('')  . $stop_dt->hms('')  . ' ' . DateTime::TimeZone->offset_as_string($stop_dt->offset);

        t("    " . $prog->{start} . " - Start time after applying '" . $channel_offset{ $prog->{channel} } . "' timeshift");
        t("    " . $prog->{stop}  . " - Stop time after applying '"  . $channel_offset{ $prog->{channel} } . "' timeshift");
    }

    # Now check to see whether the channel broadcasting the programme is a
    # part-time channel, and if so, see whether this programme's timeslot
    # times fall within the broadcast window. If a channel broadcasts
    # through the night, we also need to test against the next day's
    # broadcast times.
    #
    # If the channel's timeshift is a multiple of 24hrs (e.g. Channel 5 +24)
    # we adjust the channel's start/stop times accordingly.
    #
    if (defined $broadcast_hours{ $prog->{channel} }) {
        $broadcast_hours{ $prog->{channel} } =~ m/(\d\d)(\d\d)-(\d\d)(\d\d)/;
        my ($chan_start_hr, $chan_start_mn, $chan_stop_hr, $chan_stop_mn) = ($1, $2, $3, $4);

        my $chan_start_dt = DateTime->new(
            year       => $yyyy,
            month      => $mm,
            day        => $dd,
            hour       => $chan_start_hr,
            minute     => $chan_start_mn,
            second     => 0,
            time_zone  => 'Europe/London',
        );

        my $chan_stop_dt = DateTime->new(
            year       => $yyyy,
            month      => $mm,
            day        => $dd,
            hour       => $chan_stop_hr,
            minute     => $chan_stop_mn,
            second     => 0,
            time_zone  => 'Europe/London',
        );

        # Shift channel start/stop times forward by whole days if necessary.
        if (defined $channel_offset{ $prog->{channel} } and $ts_dt->in_units( 'hours' ) % 24 == 0) {
            $chan_start_dt = $chan_start_dt + $ts_dt;
            $chan_stop_dt  = $chan_stop_dt  + $ts_dt;
            t("    Applying channel timeshift of " . $ts_dt->in_units( 'hours' ) . " hours");
        }

        # Correct the stop time if it is earlier than the start time
        my $chan_stops_next_day = 0;
        if ($chan_start_dt >= $chan_stop_dt) {
            $chan_stop_dt = $chan_stop_dt + $day_dur;
            $chan_stops_next_day = 1;
        }

        my $chan_start  = "" . $chan_start_dt->ymd('') . $chan_start_dt->hms('') . ' ' . DateTime::TimeZone->offset_as_string($chan_start_dt->offset);
        my $chan_stop  = "" . $chan_stop_dt->ymd('') . $chan_stop_dt->hms('') . ' ' . DateTime::TimeZone->offset_as_string($chan_stop_dt->offset);

        t("    " . $chan_start . " - Start time of channel (normal)");
        t("    " . $chan_stop  . " - Stop time of channel (normal)");

        # Include the current programme if its start time lies inside the
        # channel's broadcast window
        if ($start_dt >= $chan_start_dt && $start_dt < $chan_stop_dt) {
            t("    '" . $prog->{'_title'} . "' shown whilst channel is on-air, adding");
        }
        # If the channel starts and stops broadcasting on the same
        # calendar day and the programme's start time is outside the
        # broadcast window, skip it
        elsif ($chan_stops_next_day == 0 ) {
            if ($start_dt < $chan_start_dt) {
                t("    '" . $prog->{'_title'} . "' starts before channel has started, skipping\n");
                return undef;
            }
            elsif ($start_dt >= $chan_stop_dt) {
                t("    '" . $prog->{'_title'} . "' starts after channel has stopped, skipping\n");
                return undef;
            }
        }
        else {
            # If the channel broadcasts through the night, and the channel
            # start time is later than the stop time (i.e. 2300-0600), it is
            # possible for a program shown at or after midnight to result in
            # the generation of incorrect channel start/stop times (shifted
            # +1day forward). We therefore generate another pair of channel
            # start/stop times for the previous day to match against.
            #
            # Example: consider a 30min programme broadcast on 20120101 at 00:30
            # and we're comparing it to a channel that broadcasts between
            # 2300 and 0600. We generate start/stop times of 201201012300 and
            # 201201020600 for the channel, but the programme starts/stops
            # at 201201010030 and 201201010100. These times occur whilst the
            # channel is on-air the _previous_ day.
            #
            $chan_start_dt = $chan_start_dt - $day_dur;
            $chan_stop_dt  = $chan_stop_dt - $day_dur;

            my $chan_start = "" . $chan_start_dt->ymd('') . $chan_start_dt->hms('') . ' ' . DateTime::TimeZone->offset_as_string($chan_start_dt->offset);
            my $chan_stop  = "" . $chan_stop_dt->ymd('')  . $chan_stop_dt->hms('')  . ' ' . DateTime::TimeZone->offset_as_string($chan_stop_dt->offset);

            t("    " . $chan_start . " - Start time of channel (previous day)");
            t("    " . $chan_stop  . " - Stop time of channel (previous day)");

            # Test again to see if the programme falls between the adjusted
            # channel broadcast times
            if ($start_dt >= $chan_start_dt && $start_dt < $chan_stop_dt) {
                t("    '" . $prog->{'_title'} . "' shown whilst channel is on-air, adding");
            } else {
                t("    '" . $prog->{'_title'} . "' shown whilst channel is off-air, skipping\n");
                return undef;
            }
        }
    }
    return [ $prog->{start}, $prog->{stop} ];
}

# Remove non-title text found in programme title. This text is placed at the
# start of the 'real' title, separated from it by a colon.
#
# Text to try and match against the programme title is stored in a hash of arrays
# to shortcut the list of possible matches to those beginning with the same
# first character as the title. It would seem to be quicker to use a regex
# to match some amount of text up to colon character in the programme title,
# and then use a hash lookup against the matched text. However, there is no
# limit to the number of colons in the text to remove, so this approach cannot
# be used. NOTE: the method is used for several of the title consistency
# routines in order to speed up processing.
#
sub process_non_title_info {
    my $prog = shift;

    if ($have_title_data && %non_title_info && $prog->{'_title'} =~ m/:/) {
        my $idx_char = lc(substr $prog->{'_title'}, 0, 1);
        NON_TITLE_TEXT:
        foreach my $non_title_info (@{$non_title_info{$idx_char}}) {
            if ($prog->{'_title'} =~ s/^(\Q$non_title_info\E)\s*:\s*//i) {
                t("      Removed '" . $non_title_info
                  . "' from title. New title '" . $prog->{'_title'} . "'");
                last NON_TITLE_TEXT;
            }
        }
    }
}

# Promote demoted title from subtitle field to title field, replacing whatever
# text is in the title field at the time. If the demoted title if followed by
# a colon and the subtitle text, that is preserved in the subtitle field.
#
# A title can be demoted to the subtitle field if the programme's "brand"
# is present in the title field, as can happen with data output from Atlas.
#
# Text to try and match against the programme subtitle is stored in a hash of arrays
# to shortcut the list of possible matches to those beginning with the same
# first character as the title (as with process_non_title_info() ).
#
sub process_demoted_titles {
    my $prog = shift;
    my $brand = $prog->{'_title'};

    if ($have_title_data && %demoted_title && defined $prog->{'_episode'}
                         && defined $demoted_title{ $brand }) {
        DEMOTED_TITLE:
        foreach my $demoted_title (@{$demoted_title{ $brand }}) {
            my $new_ep;
            if ($prog->{'_episode'} =~ m/^\Q$demoted_title\E$/i) {
                $new_ep = '';
            }
            elsif ($prog->{'_episode'} =~ m/^\Q$demoted_title\E(?::|\s-)\s(.*)$/i) {
                $new_ep = $1;
            }
            else {
                next DEMOTED_TITLE;
            }

            $prog->{'_title'} = $demoted_title;
            $prog->{'_episode'} = $new_ep;
            t("      Promoted title '" . $demoted_title . "' from subtitle for brand '"
                    . $brand . "'. New subtitle '" . $prog->{'_episode'} . "'");
            $prog->{'_titles_processed'} = 1;
            $prog->{'_subtitles_processed'} = 1;
            last DEMOTED_TITLE;
        }
    }
}

# Allow arbitrary replacement of one title/episode pair with another, based
# on a given description.
#
# Intended to be used where previous title/episode replacement routines
# do not allow a specific enough correction to the listings data (i.e. for
# one-off changes).
#
# *** THIS MUST BE USED WITH CARE! ***
#
sub process_replacement_titles_desc {
    my $prog = shift;

    if ($have_title_data && %replacement_title_desc && defined $prog->{'_desc'} ) {
        my $tmp_ep;
        my $tmp_ep_num;
        my $tmp_ep_num_text = '';
        # Handle potential undef episode value, as the empty string
        # was used in place of an undef episode during concatenation
        # in the replacement hash
        if (not defined $prog->{'_episode'}) {
            $tmp_ep = '';
        }
        # Also handle an episode number that may be present in source
        # data but not in replacement text
        elsif ($prog->{'_episode'} =~ m/^\d+\s*(?:&\d+)?\s*\/\s*\d+\s*(?:\.|\/|,|;|:)?\s*-?\s*(?:series\s*(\w+)\s*(?:;|\.)?)?\s*$/) {
            $tmp_ep = '';
            $tmp_ep_num = $prog->{'_episode'};
            $tmp_ep_num_text = " (Preserving existing numbering)";
        }
        else {
            $tmp_ep = $prog->{'_episode'};
        }
        my $key = "" . $prog->{'_title'} . "|" . $tmp_ep . "|" . $prog->{'_desc'};
        # Check whether we have matched the old programme title/episode/desc combo
        if (defined $replacement_title_desc{$key}) {
            # Now replace the old title/ep values with new ones
            my ($old_title, $old_ep) = ($prog->{'_title'}, $tmp_ep);
            my ($new_title, $new_ep) = @{$replacement_title_desc{$key}};
            # update the title
            $prog->{'_title'} = $new_title;
            # if new episode value is empty string, replace with undef;
            # otherwise use new value
            if ($new_ep eq '') {
                if (defined $tmp_ep_num) {
                    $prog->{'_episode'} = $tmp_ep_num;
                }
                else {
                    $prog->{'_episode'} = undef;
                }
            }
            else {
                if (defined $tmp_ep_num) {
                    $prog->{'_episode'} = $tmp_ep_num . ": " . $new_ep;
                }
                else {
                    $prog->{'_episode'} = $new_ep;
                }
            }
            t("      Replaced old title/ep '" . $old_title . " / " . $old_ep
                . "' with new title/ep '" . $new_title . " / " . $new_ep
                . "' using desc" . $tmp_ep_num_text);
            $prog->{'_titles_processed'} = 1;
        }
    }
}

# Allow arbitrary replacement of one title/episode pair with another.
# Intended to be used where previous title/episode replacement routines
# do not allow the desired correction (i.e. for one-off changes).
#
# *** THIS MUST BE USED WITH CARE! ***
#
sub process_replacement_titles_episodes {
    my $prog = shift;

    if ($have_title_data && %replacement_title_eps) {
        my $tmp_ep;
        my $tmp_ep_num;
        my $tmp_ep_num_text = '';
        # Handle potential undef episode value, as the empty string
        # was used in place of an undef episode during concatenation
        # in the replacement hash
        if (not defined $prog->{'_episode'}) {
            $tmp_ep = '';
        }
        # Also handle an episode number that may be present in source
        # data but not in replacement text
        elsif ($prog->{'_episode'} =~ m/^\d+\s*(?:&\d+)?\s*\/\s*\d+\s*(?:\.|\/|,|;|:)?\s*-?\s*(?:series\s*(\w+)\s*(?:;|\.)?)?\s*$/) {
            $tmp_ep = '';
            $tmp_ep_num = $prog->{'_episode'};
            $tmp_ep_num_text = " (Preserving existing numbering)";
        }
        else {
            $tmp_ep = $prog->{'_episode'};
        }
        my $key = "" . $prog->{'_title'} . "|" . $tmp_ep;
        # Check whether we have matched the old programme title/episode combo
        if (defined $replacement_title_eps{$key}) {
            # Now replace the old title/ep values with new ones
            my ($old_title, $old_ep) = ($prog->{'_title'}, $tmp_ep);
            my ($new_title, $new_ep) = @{$replacement_title_eps{$key}};
            # update the title
            $prog->{'_title'} = $new_title;
            # if new episode value is empty string, replace with undef;
            # otherwise use new value
            if ($new_ep eq '') {
                if (defined $tmp_ep_num) {
                    $prog->{'_episode'} = $tmp_ep_num;
                }
                else {
                    $prog->{'_episode'} = undef;
                }
            }
            else {
                if (defined $tmp_ep_num) {
                    $prog->{'_episode'} = $tmp_ep_num . ": " . $new_ep;
                }
                else {
                    $prog->{'_episode'} = $new_ep;
                }
            }
            t("      Replaced old title/ep '" . $old_title . " / " . $old_ep
                . "' with new title/ep '" . $new_title . " / " . $new_ep
                . "'" . $tmp_ep_num_text);
            $prog->{'_titles_processed'} = 1;
        }
    }
}

# Some given programme titles contain both the title and episode data,
# separated by a colon ($title:$episode) or a hyphen ($title - $episode).
# Here we reassign the episode to the $episode element, leaving only the
# programme's title in the $title element
#
sub process_mixed_title_subtitle {
    my $prog = shift;

    if ($have_title_data && %mixed_title_subtitle && $prog->{'_title'} =~ m/:|-/) {
        my $idx_char = lc(substr $prog->{'_title'}, 0, 1);
        MIXED_TITLE_SUBTITLE:
        foreach my $mixed_title_subtitle (@{$mixed_title_subtitle{$idx_char}}) {
            if ($prog->{'_title'} =~ m/^(\Q$mixed_title_subtitle\E)\s*(?::|-)\s*(.*)/) {
                # store the captured text
                my $new_title = $1;
                my $new_episode = $2;
                $prog->{'_titles_processed'} = 1;
                if (! defined $prog->{'_episode'}) {
                    t("      Moved '" . $new_episode . "' to sub-title,"
                      . " new title is '" . $new_title . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $new_episode;
                    last MIXED_TITLE_SUBTITLE;
                }
                elsif ($prog->{'_episode'} eq $new_episode) {
                    t("      Sub-title '" . $new_episode . "' seen in "
                      . "title already exists, new title is '"
                      . $new_title . "'");
                    $prog->{'_title'} = $new_title;
                    last MIXED_TITLE_SUBTITLE;
                }
                # concat subtitle after any episode numbering (x/y)
                elsif ($prog->{'_episode'} =~ m/^\d+\s*(?:&\d+)?\s*\/\s*\d+\s*(?:\.|\/|,|;|:)?\s*-?\s*(?:series\s*(\w+)\s*(?:;|\.)?)?\s*$/) {
                    t("      Concatenating sub-title '" . $new_episode
                      . "' seen in title after existing episode numbering '"
                      . $prog->{'_episode'} . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $prog->{'_episode'} . ": " . $new_episode;
                    last MIXED_TITLE_SUBTITLE;
                }
                else {
                    t("      Concatenating sub-title '" . $new_episode
                      . "' seen in title with existing episode info '"
                      . $prog->{'_episode'} . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $new_episode . ": " . $prog->{'_episode'};
                    last MIXED_TITLE_SUBTITLE;
                }
            }
        }
    }
}

# Some given programme titles contain both the episode and title data,
# separated by a colon ($episode:$title) or a hyphen ($episode - $title).
# Here we reassign the episode to the $episode element, leaving only the
# programme's title in the $title element
#
sub process_mixed_subtitle_title {
    my $prog = shift;

    if ($have_title_data && @mixed_subtitle_title && $prog->{'_title'} =~ m/:|-/) {
        MIXED_SUBTITLE_TITLE:
        foreach my $mixed_subtitle_title (@mixed_subtitle_title) {
            if ($prog->{'_title'} =~ m/^(.*)\s*(?::|-)\s*(\Q$mixed_subtitle_title\E)/) {
                # store the captured text
                my $new_title = $2;
                my $new_episode = $1;
                $prog->{'_titles_processed'} = 1;
                if (! defined $prog->{'_episode'}) {
                    t("      Moved '" . $new_episode . "' to sub-title, "
                      . "new title is '" . $new_title . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $new_episode;
                    last MIXED_SUBTITLE_TITLE;
                }
                elsif ($prog->{'_episode'} eq $new_episode) {
                    t("      Identical sub-title '" . $new_episode
                      . "' also seen in title, new title is '"
                      . $new_title . "'");
                    $prog->{'_title'} = $new_title;
                    last MIXED_SUBTITLE_TITLE;
                }
                # concat subtitle after any episode numbering (x/y)
                elsif ($prog->{'_episode'} =~ m/^\d+\s*(?:&\d+)?\s*\/\s*\d+\s*(?:\.|\/|,|;|:)?\s*-?\s*(?:series\s*(\w+)\s*(?:;|\.)?)?\s*$/) {
                    t("      Concatenating sub-title '" . $new_episode
                      . "' seen in title after existing episode numbering '"
                      . $prog->{'_episode'} . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $prog->{'_episode'} . ": " . $new_episode;
                    last MIXED_SUBTITLE_TITLE;
                }
                else {
                    t("      Concatenating sub-title '" . $new_episode
                      . "' seen in title with existing episode info '"
                      . $prog->{'_episode'} . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $new_episode . ": " . $prog->{'_episode'};
                    last MIXED_SUBTITLE_TITLE;
                }
            }
        }
    }
}

# Listings for some programmes may have reversed title and sub-title information
# ($title = 'real' episode and $episode = 'real' title. In order to create more
# consistent data, we check for flagged programme titles and reverse the given
# title and sub-title when found.
#
sub process_reversed_title_subtitle {
    my $prog = shift;

    if ($have_title_data && %reversed_title_subtitle && defined $prog->{'_episode'}) {
        my $idx_char = lc(substr $prog->{'_title'}, 0, 1);
        REVERSED_TITLE_SUBTITLE:
        foreach my $reversed_title_subtitle (@{$reversed_title_subtitle{$idx_char}}) {
            if ($reversed_title_subtitle eq $prog->{'_episode'}) {
                t("      Seen reversed title-subtitle for '"
                  . $prog->{'_title'} . ":" . $prog->{'_episode'} . "' - reversing" );
                $prog->{'_episode'} = $prog->{'_title'};
                $prog->{'_title'} = $reversed_title_subtitle;
                t("      New title is '" . $prog->{'_title'} . "' and new "
                  . "sub-title is '" . $prog->{'_episode'} . "'");
                $prog->{'_titles_processed'} = 1;
                last REVERSED_TITLE_SUBTITLE;
            }
        }
    }
}

# Process inconsistent titles, replacing any flagged bad titles with good
# titles. A straightforward hash lookup against the programme title is used.
#
sub process_replacement_titles {
    my $prog = shift;
    if ($have_title_data && %replacement_titles) {
        my $bad_title = $prog->{'_title'};
        if (defined $replacement_titles{$bad_title}) {
            $prog->{'_title'} = $replacement_titles{$bad_title};
            t("      Replaced title '" . $bad_title . "' with '" . $prog->{'_title'} . "'");
            $prog->{'_titles_processed'} = 1;
        }
    }
}

# Process inconsistent episodes. The %replacement_episodes data structure
# is a hash of hashes.
#
sub process_replacement_episodes {
    my $prog = shift;

    if ($have_title_data && %replacement_episodes && defined $prog->{'_episode'}) {
        my $bad_episode_title = $prog->{'_title'};
        my $bad_episode = $prog->{'_episode'};
        # First, check whether we have matched the programme title
        if (defined $replacement_episodes{$bad_episode_title}) {
            # Now look for a specific episode match for the title
            if (defined $replacement_episodes{$bad_episode_title}->{$bad_episode}) {
                $prog->{'_episode'} = $replacement_episodes{$bad_episode_title}->{$bad_episode};
                t("      Replaced episode info '" . $bad_episode . "' with '" . $prog->{'_episode'} . "'");
                $prog->{'_subtitles_processed'} = 1;
            }
        }
    }
}

# Process text to remove from subtitles. The %subtitle_remove_text data structure
# is a hash of arrays.
#
sub process_subtitle_remove_text {
    my $prog = shift;

    if ($have_title_data && %subtitle_remove_text && defined $prog->{'_episode'}) {
        my $title = $prog->{'_title'};
        my $episode = $prog->{'_episode'};
        if ($subtitle_remove_text{$title}) {
            REMOVE_TEXT:
            foreach my $remove_text (sort @{$subtitle_remove_text{$title}}) {
                if ($prog->{'_episode'} =~ m/^(\Q$remove_text\E)(?:\s*:|\s*-|\s+)\s*(.*)$/) {
                    $prog->{'_episode'} = $2;
                    t("      Removed text '" . $remove_text . "' from subtitle for title '" . $prog->{'_title'} . "'");
                    $prog->{'_subtitles_processed'} = 1;
                    last REMOVE_TEXT;
                }
                # We need a non-greedy match at the start of the subtitle
                elsif ($prog->{'_episode'} =~ m/^(.*?)\s*(?::|-)?\s*(\Q$remove_text\E)$/) {
                    $prog->{'_episode'} = $1;
                    t("      Removed text '" . $remove_text . "' from subtitle for title '" . $prog->{'_title'} . "'");
                    $prog->{'_subtitles_processed'} = 1;
                    last REMOVE_TEXT;
                }
            }
        }
    }
}

# Replace an inconsistent or missing episode subtitle based a given description.
# The description should therefore be unique for each episode of the programme.
# The %replacement_ep_from_desc data structure is a hash of hashes.
#
sub process_replacement_ep_from_desc {
    my $prog = shift;

    if ($have_title_data && %replacement_ep_from_desc && defined $prog->{'_desc'}) {
        my $bad_episode_title = $prog->{'_title'};
        my $bad_ep_desc = $prog->{'_desc'};
        # First, check whether we have matched the programme title
        if (defined $replacement_ep_from_desc{$bad_episode_title}) {
            # Now look for a specific desc match for the title
            if (defined $replacement_ep_from_desc{$bad_episode_title}->{$bad_ep_desc}) {
                my $old_ep;
                (defined $prog->{'_episode'}) ? ($old_ep = $prog->{'_episode'}) : ($old_ep = '');
                $prog->{'_episode'} = $replacement_ep_from_desc{$bad_episode_title}->{$bad_ep_desc};
                t("      Updated episode from '" . $old_ep . "' to '" . $prog->{'_episode'}
                    . "' for title '" . $bad_episode_title . "', based on desc '");
                $prog->{'_subtitles_processed'} = 1;
            }
        }
    }
}

# Process programmes that may not be categorised, or are categorised with
# various categories in the source data. Untitled programmes ("To Be Announced")
# are ignored, and films are handled separately. Different programmes with
# identical titles should not be replaced using this routine as it may cause
# such programmes to be given inaccurate genres.
#
sub process_replacement_genres {
    my $prog = shift;

    if ($have_title_data && %replacement_cats && $prog->{'_title'} !~ m/^(To Be Announced|TBA)/i && ! $prog->{'_film'}) {
        if (defined $replacement_cats{$prog->{'_title'}}) {
            $prog->{'_genre'} = $replacement_cats{$prog->{'_title'}};
            t("      Assigned title '" . $prog->{'_title'} . "' to category '" . $prog->{'_genre'} . "'");
        }
    }
    elsif ($have_title_data && %replacement_cats_film && $prog->{'_title'} !~ m/^(To Be Announced|TBA)/i && $prog->{'_film'}) {
        if (defined $replacement_cats_film{$prog->{'_title'}}) {
            $prog->{'_genre'} = $replacement_cats_film{$prog->{'_title'}};
            $prog->{'_film'} = 0;
            delete $prog->{'_year'};
            t("      Re-assigned film '" . $prog->{'_title'} . "' to category '" . $prog->{'_genre'} . "'");
        }
    }
}

# Extract series/episode numbering found in $prog->{'_episode'}. Series
# and episode numbering are parsed out of the text and eventually made
# available in the <episode-num> element, being stored in intermediary
# variables during processing as when parsing the $prog->{'_sub_title'}.
# With most numbering being parsed out of $prog->{'_sub_title'} directly
# from the source data, this routine will extract most numbering inserted
# through the title/episode update/consistency routines.
#
sub extract_numbering_from_episode {
    my $prog = shift;

    if (defined $prog->{'_episode'}
             && $prog->{'_episode'} =~ m/\d+|episode/i) {

        # ) check for "x/y" format covering following formats
        #
        # "1/6 - ..."
        # "1/6, series 1 - ..."
        # "1, series 1 - ..."
        # "1/6, series one - "...
        # "1, series one - ..."
        #
        if ($prog->{'_episode'} =~
            s{
              ^                       # start at beginning of episode details
              (\d+)                   # CAPTURE the first number(s) found ($episode_num)
              \s*                     # ignore any whitespace
              \/?                     # forward slash
              \s*                     # ignore any whitespace
              (\d+)?                  # CAPTURE the second number(s) found ($num_episodes)
              \s*                     # ignore any whitespace
              (?:,)?                  # check for punctuation characters
              \s*                     # ignore any whitespace
              (?:series\s*(\w+|\d+))? # check for series number information ($series_num)
              \s*                     # ignore any whitespace
              (?:-)                   # hyphen to separate numbering from episode text
              \s*                     # ignore any whitespace
            }
            {}ix ) {
                $prog->{'_episode_num'} = $1 - 1;
                # Check that source episode number is not greater than number of episodes
                # Rather than discard the episode number, we discard the total instead which
                # is more likely to be incorrect based on observation.
                if (defined $2) {
                    if ($1 <= $2) {
                        $prog->{'_num_episodes'} = $2;
                        t("    Episode number/total found: episode $1 of $2 (subtitle, x/y)");
                    }
                    else {
                        t("    Bad episode total found: episode $1 of $2, discarding total (subtitle, x/y)");
                    }
                }
                else {
                    t("    Episode number found: episode $1 (episode, x/y)");
                }
                if (defined $3) {
                    my $digits = word_to_digit($3);
                    if (defined $digits and $digits > 0) {
                        t("    Series number found: series $digits (parsed as $3, episode, x/y)");
                        $prog->{'_series_num'} = $digits - 1;
                    }
                }
                $prog->{'_processed'} = 1;
        }

        # ) check for "Episode x" format covering following formats:
        #
        # "Episode 1"
        # "Episode one"
        #
        elsif ($prog->{'_episode'} =~
            s{
              ^                      # start at beginning of episode details
              (?:Episode|Ep)         # ignore "Episode" text
              \s*                    # ignore any whitespace
              (\w+|\d+)              # CAPTURE the first number(s) found ($episode_num)
              $                      # finish at end of episode details
            }
            {}ix ) {
                my $digits = word_to_digit($1);
                if (defined $digits and $digits > 0) {
                    t("    Episode number found: episode $digits (parsed as $1, episode, episode x)");
                    $prog->{'_episode_num'} = $digits - 1;
                }
                $prog->{'_processed'} = 1;
        }
    }
}

# Check for potential season numbering in title
#
sub extract_numbering_from_title {
    my $prog = shift;

    if ($prog->{'_title'} =~ m/Series|Season/i) {

        # this regex looks for season numbering in title with
        # in parentheses
        #
        # "Wheeler Dealers - (Series 1)"
        # "Wheeler Dealers (Season 1)"
        if ($prog->{'_title'} =~
            m{
              ^                      # start at beginning of title details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the title details before season numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)?           # check for optional punctuation characters
              \s*                    # ignore any whitespace
              (?:\()                 # opening paren
              (?:Series|Season)      # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE season number
              (?:,)?                 # ignore comma if present
              \s*                    # ignore any whitespace
              (\d+)?                 # CAPTURE episode number if present
              \s*                    # ignore any whitespace
              (?:\))                 # closing paren
              $                      # finish at end of title details
             }ix )
        {
                    $prog->{'_title'} = $1;
                    if (defined $prog->{'_series_num'} && $prog->{'_series_num'} != $2) {
                        t("    Season number (" . $prog->{'_series_num'} . ") already defined. "
                            . "Ignoring different season number (" . $2 . ") in title.");
                    }
                    else {
                        t("    Season number found: Season $2 (title regex)");
                        $prog->{'_series_num'} = $2 - 1;
                    }
                    $prog->{'_episode_num'} = $3 - 1 if $3;
                    $prog->{'_processed'} = 1;
        }

        # this regex looks for season numbering in title without
        # parentheses
        #
        # "Wheeler Dealers Series 1"
        # "Wheeler Dealers Series 1, 3"
        elsif ($prog->{'_title'} =~
            m{
              ^                      # start at beginning of title details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the title details before season numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)?           # check for optional punctuation characters
              \s*                    # ignore any whitespace
              (?:Season|Series)      # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE season number
              (?:,)?                 # ignore comma if present
              \s*                    # ignore any whitespace
              (\d+)?                 # CAPTURE episode number if present
              \s*                    # ignore any whitespace
              $                      # finish at end of title details
             }ix )
        {
                    $prog->{'_title'} = $1;
                    if (defined $prog->{'_series_num'} && $prog->{'_series_num'} != $2) {
                        t("    Season number (" . $prog->{'_series_num'} . ") already defined. "
                            . "Ignoring different season number (" . $2 . ") in title.");
                    }
                    else {
                        t("    Season number found: Season $2 (title regex)");
                        $prog->{'_series_num'} = $2 - 1;
                    }
                    $prog->{'_episode_num'} = $3 - 1 if $3;
                    $prog->{'_processed'} = 1;
        }
    }
}

# Part numbering is parsed but unused. However, when part numbering is
# seen in the text it is processed to make its format consistent.
#
# FIXME should we export part number in <episode-num> and remove
# it from the text?
#
sub extract_part_numbering_from_episode {
    my $prog = shift;

    if (defined $prog->{'_episode'}
             && $prog->{'_episode'} =~ m/Part|Pt|\d\s*$/i) {

        # this regex looks for part numbering in parentheses
        #
        # "Dead Man's Eleven (Part 1)"
        # "Dead Man's Eleven - (Part 1)"
        # "Dead Man's Eleven - (Part 1/2)"
        # "Dead Man's Eleven (Pt 1)"
        # "Dead Man's Eleven - (Pt. 1)"
        # "Dead Man's Eleven - (Pt. 1/2)"
        if ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)?           # check for optional punctuation characters
              \s*                    # ignore any whitespace
              (?:\()                 # opening paren
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE part number
              \s*                    # ignore any whitespace
              (?:\/\s*\d+)?          # ignore any total part number
              (?:\))                 # closing paren
              $                      # finish at end of episode details
             }ix )
        {
                    t("    Part number found: part $2 (regex #1)");
                    $prog->{'_episode'} = $1 . " (Part " . $2 . ")";
                    $prog->{'_part_num'} = $2 - 1;
                    $prog->{'_processed'} = 1;
        }

        # this regex looks for part numbering with no other episode information
        #
        # "Part 1"
        # "Part 1/3"
        # "Pt 2"
        # "Pt 2/3"
        # "Pt. 3"
        elsif ($prog->{'_episode'} =~
            m{
              ^                    # start at beginning of episode details
              (?:Part|Pt(?:\.)?)   # check for Part/Pt text
              \s*                  # ignore any whitespace
              (\d+)                # CAPTURE part number
              \s*                  # ignore any whitespace
              (?:\/\s*\d+)?        # ignore any total part number
              $                    # finish at end of episode details
             }ix )
        {
                    t("    Part number found: part $1 (regex #2)");
                    $prog->{'_episode'} = "Part " . $1;
                    $prog->{'_part_num'} = $1 - 1;
                    $prog->{'_processed'} = 1;
        }

        # this regex looks for bare part numbering after a comma, semicolon,
        # colon or hyphen
        #
        # "Dead Man's Eleven - Part 1"
        # "Dead Man's Eleven: Part 1"
        # "Dead Man's Eleven; Pt 1"
        # "Dead Man's Eleven, Pt. 1"
        elsif ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)            # punctuation characters
              \s*                    # ignore any whitespace
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE part number
              \s*                    # ignore any whitespace
              (?:\/\s*\d+)?          # ignore any total part number
              $                      # finish at end of episode details
             }ix )
        {
                    t("    Part number found: part $2 (regex #3)");
                    $prog->{'_episode'} = $1 . " (Part " . $2 . ")";
                    $prog->{'_part_num'} = $2 - 1;
                    $prog->{'_processed'} = 1;
        }

        # this regex looks for part numbering immediately following episode info
        #
        # "Dead Man's Eleven Part 1"
        # "Dead Man's Eleven Pt 1"
        # "Dead Man's Eleven Pt 1/2"
        # "Dead Man's Eleven Pt. 1"
        elsif ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE part number
              \s*                    # ignore any whitespace
              (?:\/\s*\d+)?          # ignore any total part number
              $                      # finish at end of episode details
             }ix )
        {
                    t("    Part number found: part $2 (regex #4)");
                    $prog->{'_episode'} = $1 . " (Part " . $2 . ")";
                    $prog->{'_part_num'} = $2 - 1;
                    $prog->{'_processed'} = 1;
        }

        # this regex looks for a digit (conservatively between 1 and 6) following
        # the episode details, a colon and at least one space
        #
        # "Dead Man's Eleven: 1"
        elsif ($prog->{'_episode'} =~
            m{
              ^                    # start at beginning of episode details
              (.*)                 # CAPTURE the episode details before part numbering
              \s*                  # ignore any whitespace
              (?::)                # colon
              \s+                  # ignore any whitespace - min 1 space
              (\d{1})              # CAPTURE single digit part number between 1 and 6
              $                    # finish at end of episode details
             }ix )
        {
                    if ($2 ge 1 && $2 le 6) {
                        t("    Part number found: part $2 (regex #5, range 1-6)");
                        $prog->{'_episode'} = $1 . " (Part " . $2 . ")";
                        $prog->{'_part_num'} = $2 - 1;
                        $prog->{'_processed'} = 1;
                    }
        }

        # this regex looks for worded part numbering with no other episode information
        #
        # "Part One"
        # "Pt Two"
        # "Pt. Three"
        elsif ($prog->{'_episode'} =~
            m{
              ^                    # start at beginning of episode details
              (?:Part|Pt(?:\.)?)   # check for Part/Pt text
              \s+                  # ignore any whitespace
              (\w+)                # CAPTURE part number wording
              $                    # finish at end of episode details
             }ix )
        {
                    my $part_digits = word_to_digit($1);
                    if (defined $part_digits and $part_digits > 0) {
                        t("    Part number found: part $part_digits (regex #6, parsed as $1)");
                        $prog->{'_episode'} = "Part " . $part_digits;
                        $prog->{'_part_num'} = $part_digits - 1;
                        $prog->{'_processed'} = 1;
                    }
        }

        # this regex looks for bare part numbering after a comma, semicolon,
        # colon or hyphen, where the numbering is given in words
        #
        # "Dead Man's Eleven - Part One"
        # "Dead Man's Eleven: Part One"
        # "Dead Man's Eleven; Pt One"
        # "Dead Man's Eleven, Pt. One"
        elsif ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)            # punctuation characters
              \s*                    # ignore any whitespace
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s+                    # ignore any whitespace
              (\w+)                  # CAPTURE part number wording
              $                      # finish at end of episode details
             }ix )
        {
                    my $part_digits = word_to_digit($2);
                    if (defined $part_digits and $part_digits > 0) {
                        t("    Part number found: part $part_digits (regex #7, parsed as $2)");
                        $prog->{'_episode'} = $1 . " (Part " . $part_digits . ")";
                        $prog->{'_part_num'} = $part_digits - 1;
                        $prog->{'_processed'} = 1;
                    }
        }

        # this regex looks for worded part numbering immediately following episode info
        #
        # "Dead Man's Eleven Part One"
        # "Dead Man's Eleven Pt One"
        # "Dead Man's Eleven Pt. One"
        elsif ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\w+)                  # CAPTURE part number wording
              $                      # finish at end of episode details
             }ix )
        {
                    my $part_digits = word_to_digit($2);
                    if (defined $part_digits and $part_digits > 0) {
                        t("    Part number found: part $part_digits (regex #8, parsed as $2)");
                        $prog->{'_episode'} = $1 . " (Part " . $part_digits . ")";
                        $prog->{'_part_num'} = $part_digits - 1;
                        $prog->{'_processed'} = 1;
                    }
        }

        # this regex looks for worded part numbering in parentheses
        #
        # "Dead Man's Eleven (Part One)"
        # "Dead Man's Eleven - (Part One)"
        # "Dead Man's Eleven (Pt One)"
        # "Dead Man's Eleven - (Pt. One)"
        elsif ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)?           # check for optional punctuation characters
              \s*                    # ignore any whitespace
              (?:\()                 # opening paren
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\w+)                  # CAPTURE part number wording
              \s*                    # ignore any whitespace
              (?:\))                 # closing paren
              $                      # finish at end of episode details
             }ix )
        {
                    my $part_digits = word_to_digit($2);
                    if (defined $part_digits and $part_digits > 0) {
                        t("    Part number found: part $part_digits (regex #9, parsed as $2)");
                        $prog->{'_episode'} = $1 . " (Part " . $part_digits . ")";
                        $prog->{'_part_num'} = $part_digits - 1;
                        $prog->{'_processed'} = 1;
                    }
        }

        # check for potential part numbering left unprocessed
        #
        # we do this at the end of the if-else because the (Part x) text is
        # not (yet) removed from the episode details, only made consistent
        elsif ($opt->{debug} && $prog->{'_episode'} =~ m/\b(Part|Pt(\.)?)(\d+|\s+\w+)/i) {
            t("    Possible part numbering still seen: " . $prog->{'_episode'});
            $possible_part_nums{$prog->{'_episode'}} = $prog->{'_episode'};
        }
    }
}

# Extract series/episode numbering found in $prog->{'_sub_title'}. Series
# and episode numbering are parsed out of the text and eventually made
# available in the <episode-num> element, being stored in intermediary
# variables during processing.
#
sub extract_numbering_from_sub_title {
    my $prog = shift;

    if (defined $prog->{'_sub_title'}
             && $prog->{'_sub_title'} =~ m/\d+|series|episode/i) {

        # ) check for most common "x/y, series z" format first
        #
        # "1/6, series 1"
        #
        if ($prog->{'_sub_title'} =~
            s{
              ^                       # start at beginning of sub_title details
              (\d+)                   # CAPTURE the first number(s) found ($episode_num)
              \/                      # forward slash
              (\d+)                   # CAPTURE the second number(s) found ($num_episodes)
              ,                       # comma
              \s                      # whitespace
              series\s(\d+)           # check for series number information ($series_num)
              $                       # stop at end of sub_title details
            }
            {}ix ) {
                $prog->{'_episode_num'} = $1 - 1;
                # Check that source episode number is not greater than number of episodes
                # Rather than discard the episode number, we discard the total instead which
                # is more likely to be incorrect based on observation.
                if ($1 <= $2) {
                    $prog->{'_num_episodes'} = $2;
                    t("    Episode number/total found: episode $1 of $2 (subtitle: x/y, series z)");
                }
                else {
                    t("    Bad episode total found: episode $1 of $2, discarding total (subtitle: x/y, series z)");
                }
                t("    Series number found: series $3 (subtitle: x/y, series z)");
                $prog->{'_series_num'} = $3 - 1;
                $prog->{'_processed'} = 1;
        }

        # ) check for "x/y" formats covering other formats
        #
        # "1"
        # "1/6"
        # "1, series 1"
        # "1/6, series 1"
        # "1`/3, series 1"
        # "1&2/6, series 1" - second episode unused
        # "7&1, series 1&2" - second episode and series unused
        # "1&2"
        # "1 and 2/6, series 1"
        # "1A/6, series 1" - episode "part A or B" currently unused
        #
        elsif ($prog->{'_sub_title'} =~
            s{
              ^                       # start at beginning of sub_title details
              (\d+)                   # CAPTURE the first number(s) found ($episode_num)
              `?                      # ignore any erroneous chars
              (?:A|B)?                # ignore optional episode "part"
              \s*                     # ignore any whitespace
              (?:(?:&|and)\s*\d+)?    # check for "&2", "and 2" details relating to following episode
              \s*                     # ignore any whitespace
              \/?                     # forward slash
              \s*                     # ignore any whitespace
              (\d+)?                  # CAPTURE the second number(s) found ($num_episodes)
              \s*                     # ignore any whitespace
              (?:,)?                  # check for punctuation characters
              \s*                     # ignore any whitespace
              (?:series\s*(\d+)(?:&\d+)?)?  # check for series number information ($series_num)
              \s*                     # ignore any whitespace
              $                       # stop at end of sub_title details
            }
            {}ix ) {
                $prog->{'_episode_num'} = $1 - 1;
                # Check that source episode number is not greater than number of episodes
                # Rather than discard the episode number, we discard the total instead which
                # is more likely to be incorrect based on observation.
                if (defined $2) {
                    if ($1 <= $2) {
                        $prog->{'_num_episodes'} = $2;
                        t("    Episode number/total found: episode $1 of $2 (subtitle, x/y)");
                    }
                    else {
                        t("    Bad episode total found: episode $1 of $2, discarding total (subtitle, x/y)");
                    }
                }
                else {
                    t("    Episode number found: episode $1 (subtitle, x/y)");
                }
                if (defined $3) {
                    t("    Series number found: series $3 (subtitle, x/y)");
                    $prog->{'_series_num'} = $3 - 1;
                }
                $prog->{'_processed'} = 1;
        }

        # ) check for special case of "x/y/z, series n" format where two parts of a series have
        # been edited into a single programme for transmission. Only the first episode number
        # given is output in episode-num
        #
        # "1/2/6, series 1"
        #
        elsif ($prog->{'_sub_title'} =~
            s{
              ^                       # start at beginning of sub_title details
              (\d+)                   # CAPTURE the first number(s) found ($episode_num)
              \s*                     # ignore any whitespace
              \/                      # forward slash
              \s*                     # ignore any whitespace
              (\d+)                   # CAPTURE the second number(s) found (unused at present)
              \s*                     # ignore any whitespace
              \/                      # forward slash
              \s*                     # ignore any whitespace
              (\d+)                   # CAPTURE the third number(s) found ($num_episodes)
              \s*                     # ignore any whitespace
              ,                       # check for punctuation characters
              \s*                     # ignore any whitespace
              series\s*(\d+)          # check for series number information ($series_num)
              \s*                     # ignore any whitespace
              $                       # stop at end of sub_title details
            }
            {}ix ) {
                $prog->{'_episode_num'} = $1 - 1;
                # Check that source episode number is not greater than number of episodes
                # Rather than discard the episode number, we discard the total instead which
                # is more likely to be incorrect based on observation.
                if (defined $3) {
                    if ($1 <= $3) {
                        $prog->{'_num_episodes'} = $3;
                        t("    Episode number/total found: episode $1 of $3 (subtitle, x/y/z. series n)");
                    }
                    else {
                        t("    Bad episode total found: episode $1 of $3, discarding total (subtitle, x/y/z, series n)");
                    }
                }
                if (defined $4) {
                    t("    Series number found: series $4 (subtitle, x/y/z, series n)");
                    $prog->{'_series_num'} = $4 - 1;
                }
                $prog->{'_processed'} = 1;
        }

        # ) check for "Series x" format covering following formats:
        #
        # "Series 1"
        #
        elsif ($prog->{'_sub_title'} =~
            s{
              ^                      # start at beginning of sub_title details
              (?:Series)             # ignore "Series" text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE the first number(s) found ($series_num)
              $                      # finish at end of sub_title details
            }
            {}ix ) {
                if (defined $1) {
                    t("    Series number found: series $1 (subtitle, series x)");
                    $prog->{'_series_num'} = $1 - 1;
                }
                $prog->{'_processed'} = 1;
        }

        # ) check for "Series  " format where series number is missing. Here
        # we remove the text from the sub_title field
        #
        elsif ($prog->{'_sub_title'} =~
            s{
              ^                      # start at beginning of sub_title details
              Series                 # "Series" text
              \s*                    # ignore any whitespace
              $                      # finish at end of sub_title details
            }
            {}ix ) {
                t("    Missing series number found (subtitle, series)");
        }

        # ) check for "Episode x" format covering following formats:
        #
        # "Episode 1"
        # "Episode one"
        #
        elsif ($prog->{'_sub_title'} =~
            s{
              ^                      # start at beginning of sub_title details
              (?:Episode|Ep|Epiosde) # ignore "Episode" text
              \s*                    # ignore any whitespace
              (\w+|\d+)              # CAPTURE the first number(s) found ($episode_num)
              $                      # finish at end of sub_title details
            }
            {}ix ) {
                my $digits = word_to_digit($1);
                if (defined $digits and $digits > 0) {
                    t("    Episode number found: episode $digits (parsed as $1, subtitle, episode x)");
                    $prog->{'_episode_num'} = $digits - 1;
                }
                $prog->{'_processed'} = 1;
        }
    }
}

sub config_stage {
    my ( $stage, $conf ) = @_;

    # Update encoding if seen in new-style config file
    if (defined( $conf->{encoding} )) {
        $xml_encoding = $conf->{encoding}[0];
    }

    my $result;
    my $writer = new XMLTV::Configure::Writer( OUTPUT => \$result,
                                               encoding => $xml_encoding );

    $writer->start( { grabber => "$grabber_name" } );

    if ($stage eq 'start') {

        $writer->start_selectone( {
            id => 'encoding',
            title => [ [ 'Encoding', 'en' ] ],
            description => [
                [ "Select which output format to use",
                'en' ] ],
        } );
        $writer->write_option( {
            value => 'utf-8',
            text => [ [ 'UTF-8 (Unicode)', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'iso-8859-1',
            text => [ [ 'ISO-8859-1 (Latin-1)', 'en' ] ],
        } );
        $writer->end_selectone();

        $writer->end('select-cachedir');
    }
    elsif ($stage eq 'select-cachedir') {
        $writer->write_string( {
            id => 'cachedir',
            title => [ [ 'Enter the directory to store the listings cache in', 'en' ] ],
            description => [
                [ "$grabber_name uses a cache with files that it has already " .
                "downloaded. Please specify where the cache shall be stored.",
                'en' ] ],
            default => $default_cachedir,
        } );
        $writer->end('select-title-processing');
    }
    elsif ($stage eq 'select-title-processing') {

        $writer->start_selectone( {
            id => 'title-processing',
            title => [ [ 'Enable title processing?', 'en' ] ],
            description => [
                [ "In a bid to provide more consistent listings data, $grabber_name " .
                "can further process programme and episode titles.",
                'en' ] ],
        } );
        $writer->write_option( {
            value => 'enabled',
            text => [ [ 'Enable title processing', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'disabled',
            text => [ [ 'Disable title processing', 'en' ] ],
        } );
        $writer->end_selectone();

        $writer->end('select-utf8-fixups');
    }
    elsif ($stage eq 'select-utf8-fixups') {

        $writer->start_selectone( {
            id => 'utf8-fixups',
            title => [ [ 'Enable UTF-8 fixups?', 'en' ] ],
            description => [
                [ "The source data can be processed to detect and correct any mis-encoded " .
                "UTF-8 characters. Although such errors are rare, it is recommended to " .
                "enable this option.",
                'en' ] ],
        } );
        $writer->write_option( {
            value => 'enabled',
            text => [ [ 'Enable UTF-8 fixups', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'disabled',
            text => [ [ 'Disable UTF-8 fixups', 'en' ] ],
        } );
        $writer->end_selectone();

        $writer->end('ask-autoconfig');
    }
    elsif ($stage eq 'ask-autoconfig') {

        my $use_lineups = ask_boolean('Do you want channels auto-configured based on your chosen platform?', 1);
        if ($use_lineups) {
            $writer->end('select-country');
        }
        else {
            $writer->end('select-channels');
        }
    }
    elsif ($stage eq 'select-country') {

        $writer->start_selectone( {
            id => 'country',
            title => [ [ 'Choose your location', 'en' ] ],
            description => [
                [ "$grabber_name can use your location in " .
                "order to determine which national channels to " .
                "receive listings for.",
                'en' ] ],
        } );
        $writer->write_option( {
            value => 'England',
            text => [ [ 'England', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'Scotland',
            text => [ [ 'Scotland', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'Wales',
            text => [ [ 'Wales', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'Northern Ireland',
            text => [ [ 'Northern Ireland', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'Ireland',
            text => [ [ 'Republic of Ireland', 'en' ] ],
        } );
        $writer->end_selectone();

        $writer->end('select-postcode');
    }
    elsif ($stage eq 'select-postcode') {

        my $country = $conf->{'country'}[0];
        if ($country =~ m/^Ireland$/i) {
            $writer->end('select-lineup');
        }
        else {
            $writer->write_string( {
                id => 'postcode',
                title => [ [ 'Enter the first part of your postcode', 'en' ] ],
                description => [
                    [ "$grabber_name can use the first part of your postcode in " .
                    "order to determine which regional channels to receive listings for",
                    'en' ] ],
                default => 'W12',
            } );
            $writer->end('select-lineup');
        }
    }
    elsif ($stage eq 'select-lineup') {

        my $lineups_doc = parse_lineup_xml_doc( 'lineups.xml' );
        my $ns = $lineups_doc->find( "//xmltv-lineup" );

        LINEUP:
        foreach my $lineup ($ns->get_nodelist) {

            # filter available lineups by country
            if ($lineup->findnodes( "availability[\@area='country']" )) {
                my $country = $conf->{'country'}[0];
                my $path = "availability[\@area='country'][.='$country']";
                if (! $lineup->findnodes( $path ) ) {
                    remove_node($lineup);
                    next LINEUP;
                }
            }
            else {
                # include the lineup if no specific availability given
            }
        }

        # refresh list
        $ns = $lineups_doc->find( "//xmltv-lineup" );

        $writer->start_selectone( {
            id => 'lineup',
            title => [ [ 'Select which TV platform you use', 'en' ] ],
            description => [
                [ "When choosing which channels to download listings for, $grabber_name " .
                "can show only those channels available on your TV platform.",
                'en' ] ],
        } );

        LINEUP:
        foreach my $lineup ($ns->get_nodelist) {

            my $id   = $lineup->findvalue( '@id' );
            my $dn   = $lineup->findvalue( 'display-name' );
            my $type = $lineup->findvalue( 'type' );

            $writer->write_option( {
                value => $id,
                text  => [ [ "$dn ($type)", 'en' ] ],
            } );
        }

        $writer->end_selectone();

        $writer->end( 'select-packages' );
    }
    #FIXME
    #filter by country/postcode before determining available packages
    elsif ($stage eq 'select-packages') {

        my $lineup = $conf->{lineup}[0];
        my $lineup_doc = parse_lineup_xml_doc("$lineup.xml");

        # First check for any basic packages that should be chosen first.
        # These can be FTA/FTV channels for channels available on a platform
        # without a subscription, or the basic package requried for a
        # subscription platform like Virgin TV.
        my $basic_nodes = $lineup_doc->find( "//lineup-entry/package[\@type='basic']" );

        # We include FTA/FTV channels by default and only ask a
        # user to choose a basic subscription package
        my %basic_subs_pkgs;
        foreach my $package_node ($basic_nodes->get_nodelist) {

            my $id = $package_node->textContent;
            if ($id =~ m/Free-to-air/i) {
            }
            elsif ($id =~ m/Free-to-view/i) {
            }
            else {
                $basic_subs_pkgs{$id} = $id;
            }
        }
        if (%basic_subs_pkgs) {
            $writer->start_selectone( {
                id => 'basic-package',
                title => [ [ 'Select which basic subscription package you have', 'en' ] ],
                description => [
                    [ "Please choose from one of the following basic subscription packages",
                    'en' ] ],
            } );

            foreach my $package (keys %basic_subs_pkgs) {
                $writer->write_option( {
                    value => $package,
                    text  => [ [ "$package", 'en' ] ],
                } );
            }

            $writer->end_selectone();
        }

        # Check for any available premium subscription packages
        my $premium_sub_nodes = $lineup_doc->find( "//lineup-entry/package[\@type='subscription'][. != 'Premium']" );
        my %premium_subs_pkgs;
        foreach my $package_node ($premium_sub_nodes->get_nodelist) {
            my $id = $package_node->textContent;
            if ($id =~ m/Pay-per-view/i) { # Ignore PPV channels
            }
            else {
                $premium_subs_pkgs{$id} = $id;
            }
        }
        if (%premium_subs_pkgs) {
            $writer->start_selectmany( {
                id => 'subscription-package',
                title => [ [ 'Select which packages you subscribe to', 'en' ] ],
                description => [
                    [ "Please choose from the following subscription packagess",
                    'en' ] ],
            } );

            foreach my $package (sort keys %premium_subs_pkgs) {
                $writer->write_option( {
                    value => $package,
                    text  => [ [ "$package", 'en' ] ],
                } );
            }

            $writer->end_selectmany();
        }


        # Check for any individual premium channels
        my $premium_chan_name_nodes = $lineup_doc->find( "//lineup-entry/package[\@type='subscription'][.='Premium']/../station/name" );
        my %premium_channels;
        foreach my $name_node ($premium_chan_name_nodes->get_nodelist) {
            my $name = $name_node->textContent;
            $premium_channels{$name} = $name;
        }
        if (%premium_channels) {
            $writer->start_selectmany( {
                id => 'premium-channel',
                title => [ [ 'Select which premium channels you subscribe to', 'en' ] ],
                description => [
                    [ "Please choose from the following premium channels",
                    'en' ] ],
            } );

            foreach my $channel (sort keys %premium_channels) {
                $writer->write_option( {
                    value => $channel,
                    text  => [ [ "$channel", 'en' ] ],
                } );
            }

            $writer->end_selectmany();
        }

        #FIXME
        # currently we ignore PPV channels during config as listings for these
        # are not available. We can still output them during get-lineup though.

        $writer->end( 'select-full-lineups' );
    }
    elsif ($stage eq 'select-full-lineups') {

        $writer->start_selectone( {
            id => 'full-lineups',
            title => [ [ 'Include unsupported channels in lineup?', 'en' ] ],
            description => [
                [ "When generating a lineup you " .
                "can choose to include channels which are not currently " .
                "supported with listings. You should choose this option " .
                "if you can receive listings for these channels from elsewhere " .
                "(e.g. EIT) and/or want the full channel lineup available.",
                'en' ] ],
        } );
        $writer->write_option( {
            value => 'enabled',
            text => [ [ 'Include unsupported channels', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'disabled',
            text => [ [ 'Exclude unsupported channels', 'en' ] ],
        } );
        $writer->end_selectone();

        # The select-channels stage must be the last stage called
        $writer->end('select-channels');
    }
    else {
        die "Unknown stage $stage";
    }

    return $result;
}

sub list_channels {
    my ( $conf, $opt ) = @_;

    # Update encoding if seen in new-style config file
    if (exists $conf->{encoding}) {
        $xml_encoding = $conf->{encoding}[0];
    }

    my $channels = load_available_channels($conf, $opt);

    # only filter available channels if we see a lineup entry
    if (exists $conf->{lineup})  {
        my $lineup = $conf->{lineup}[0];
        my $doc = parse_lineup_xml_doc("$lineup.xml");
        $channels = get_supported_lineup_channels($conf, $opt, $doc, $channels);
    }

    my $result = "";
    my $fh = new IO::Scalar \$result;
    my $oldfh = select( $fh );

    my %g_args = (OUTPUT => $fh);

    # Write XMLTV to $result, rather than STDOUT
    my $writer = new XMLTV::Writer(%g_args, encoding => $xml_encoding);
    $writer->start(\%tv_attributes);

    my $sorted_channels = sort_wanted_channels_by_name($channels);

    foreach my $channel (@{$sorted_channels}) {
        delete $channel->{'rt_id'};
        $writer->write_channel($channel);
    }

    $writer->end;
    select( $oldfh );
    $fh->close();

    return $result;
}

sub list_lineups {

    my $doc = parse_lineup_xml_doc( 'lineups.xml' );
    return $doc->toString();
}

#FIXME - option to include all channels inc supported
sub get_lineup {

    my $conf = shift;
    my $opt = shift;

    if (! exists $conf->{'lineup'}) {
        die "Error: No lineup is configured";
    }

    my $lineup = $conf->{'lineup'}[0];
    my $unfiltered_lineup_doc = parse_lineup_xml_doc( "$lineup.xml" );
    my $filtered_lineup_doc = filter_channels($conf, $opt, $unfiltered_lineup_doc);

    return pretty_print_xml($filtered_lineup_doc);
}

# Retrieve a given lineup XML file via XMLTV::Supplement and return
# a reference to the parsed XML document
#
sub parse_lineup_xml_doc {

    my $file = shift;
    my $string = GetSupplement("$grabber_name/lineups", "$file");
    die "Error: XML lineup document 'lineups/$file' is missing or empty, cannot continue"
            if (! defined $string || $string eq '');

    my $xml = XML::LibXML->new;
    my $doc;
    eval { $doc = $xml->parse_string( $string ) };
    die "Error: Could not parse XML lineup document 'lineups/$file'" if ($@);

    return $doc;
}

sub get_wanted_channels_aref {

    my $conf = shift;
    my $opt = shift;
    my $available_channels = shift;

    my $lineup;
    if (exists $conf->{lineup}) {
        $lineup = $conf->{lineup}[0];
        my $doc = parse_lineup_xml_doc("$lineup.xml");
        $available_channels = get_supported_lineup_channels($conf, $opt, $doc, $available_channels);
    }
    else {
        $available_channels = get_supported_config_channels($conf, $opt, $available_channels);
    }

    return sort_wanted_channels_by_name($available_channels);
}

# Parse an XML lineup document and extract a list of channels supported by the
# grabber. Return a hashref of the available channels.
sub get_supported_lineup_channels {

    my $conf = shift;
    my $opt = shift;
    my $unfiltered_lineup_doc = shift;
    my $xmltv_channels_href = shift;

    my $filtered_lineup_doc = filter_channels($conf, $opt, $unfiltered_lineup_doc);
    my @stations = $filtered_lineup_doc->findnodes( "//station" );

    LINEUP_STATION:
    foreach my $station (@stations) {
        my $id = $station->findvalue( '@rfc2838' );
        if (exists $xmltv_channels_href->{$id}) {
            if ($opt->{'debug'}) {
                say("    Channel '$id' is available in the grabber");
            }
            $xmltv_channels_href->{$id}{'_matched'} = 1;
            next LINEUP_STATION;
        }
    }

    # remove any channels not flagged
    foreach my $id (keys %{$xmltv_channels_href}) {
        unless (exists $xmltv_channels_href->{$id}{'_matched'}) {
            delete $xmltv_channels_href->{$id};
        }
    }

    if ($opt->{'debug'}) {
        say("  A total of " . scalar (keys %{$xmltv_channels_href})
                . " lineup entries are supported by the grabber");
    }
    return $xmltv_channels_href;
}

# Iterate over the lineup document and remove any channels that are unavailable
# in the configured country and postcode, or unsupported by the grabber (i.e. no
# associated XMLTV ID). Return the document containing the remaining lineup entries.
sub filter_channels {

    my $conf = shift;
    my $opt = shift;
    my $lineup_doc = shift;

    $lineup_doc = filter_channels_by_location($conf, $opt, $lineup_doc);
    $lineup_doc = filter_channels_by_package($conf, $opt, $lineup_doc);

    # By default, generate a full lineup including unsupported channels. Only
    # remove unsupported channels if explicitly configured
    if (exists $conf->{'full-lineups'} && $conf->{'full-lineups'}[0] eq 'disabled') {
        $lineup_doc = filter_channels_by_xmltv_support($conf, $opt, $lineup_doc);
    }

    # remove nodes used for filtering purposes above from final lineup
    $lineup_doc = remove_filter_ndoes_from_lineup($conf, $opt, $lineup_doc);

    return $lineup_doc;
}

sub filter_channels_by_location {

    my $conf = shift;
    my $opt = shift;
    my $lineup_doc = shift;

    my @entries = $lineup_doc->findnodes( "//lineup-entry" );

    LINEUP_ENTRY:
    foreach my $entry (@entries) {
        # keep channels that do not have availability information
        if (! $entry->exists( "availability" )) {
            next LINEUP_ENTRY;
        }

        my $matched; # have we matched this channel against our configured location?

        # keep channels matched by configured postcode
        my $user_postcode = $conf->{'postcode'}[0];
        if (defined $user_postcode && lc $user_postcode ne 'none') {
            my @area_postcodes = $entry->findnodes( "availability[\@area='postcode']" );
            foreach my $postcode_node (@area_postcodes) {

                my $value = $postcode_node->textContent;
                my @postcodes = split /,/, $value;
                foreach my $chan_postcode (@postcodes) {
                    if (lc $chan_postcode eq lc $user_postcode) {
                        $matched++;
                    }
                }
            }
        }

        # keep channels that are matched by country
        my $user_country = $conf->{'country'}[0];
        if (defined $user_country && lc $user_country ne 'none') {
            my @area_countries = $entry->findnodes( "availability[\@area='country']" );
            foreach my $country_node (@area_countries) {

                my $value = $country_node->textContent;
                my @countries = split /,/, $value;
                foreach my $chan_country (@countries) {
                    if (lc $chan_country eq lc $user_country) {
                        $matched++;
                    }
                }
            }
        }

        # remove this channel if we haven't matched it
        remove_node($entry) unless $matched;
    }

    return $lineup_doc;
}

sub filter_channels_by_package {

    my $conf = shift;
    my $opt = shift;
    my $lineup_doc = shift;

    my @entries = $lineup_doc->findnodes( "//lineup-entry" );
    my $conf_basic_pkg = $conf->{'basic-package'}[0];

    LINEUP_ENTRY:
    foreach my $entry (@entries) {

        my $matched; # have we matched this channel against our configured packages?

        # FTA/FTV and basic packages
        my @basic_pkg_nodes = $entry->findnodes( "package[\@type='basic']" );
        PACKAGE:
        foreach my $pkg_node (@basic_pkg_nodes) {
            my $pkg = $pkg_node->textContent;
            if ($pkg =~ m/Free-to-air/i) {
                $matched = 1;
                last PACKAGE;
            }
            elsif ($pkg =~ m/Free-to-view/i) {
                $matched = 1;
                last PACKAGE;
            }
            # not a FTA/FTV basic package, first look for Virgin TV basic pkgs
            elsif ($pkg =~ m/^(M|M\+|L|XL)$/i) {
                if ($conf_basic_pkg eq 'XL') {
                    $matched = 1;
                    last PACKAGE;
                }
                if ($conf_basic_pkg eq 'L'  && $pkg =~ m/^(M|M\+|L)$/i) {
                    $matched = 1;
                    last PACKAGE;
                }
                if ($conf_basic_pkg eq 'M+' && $pkg =~ m/^(M|M\+)$/i) {
                    $matched = 1;
                    last PACKAGE;
                }
                if ($conf_basic_pkg eq 'M'  && $pkg =~ m/^M$/i) {
                    $matched = 1;
                    last PACKAGE;
                }
            }
            # next look for UPC Ireland basic pkgs
            elsif ($pkg =~ m/^(Value|Select|Select Extra|Max)$/i) {
                if ($conf_basic_pkg eq 'Max') {
                    $matched = 1;
                    last PACKAGE;
                }
                if ($conf_basic_pkg eq 'Select Extra' && $pkg =~ m/^(Value|Select|Select Extra)$/i) {
                    $matched = 1;
                    last PACKAGE;
                }
                if ($conf_basic_pkg eq 'Select' && $pkg =~ m/^(Value|Select)$/i) {
                    $matched = 1;
                    last PACKAGE;
                }
                if ($conf_basic_pkg eq 'Value' && $pkg =~ m/^Value$/i) {
                    $matched = 1;
                    last PACKAGE;
                }
            }
            else {
                foreach my $conf_pkg (@{$conf->{'basic-package'}}) {
                    if ($pkg eq $conf_pkg) {
                        $matched = 1;
                        last PACKAGE;
                    }
                }
            }
        }

        # Subscription packages
        my @subs_pkg_nodes = $entry->findnodes( "package[\@type='subscription'][. != 'Premium']" );
        PACKAGE:
        foreach my $pkg_node (@subs_pkg_nodes) {
            my $pkg = $pkg_node->textContent;
            foreach my $conf_pkg (@{$conf->{'subscription-package'}}) {
                if ($pkg eq $conf_pkg) {
                    $matched = 1;
                    last PACKAGE;
                }
            }
        }

        # Premium channels
        my @prem_chan_name_nodes = $entry->findnodes( "package[\@type='subscription'][.='Premium']/../station/name" );
        CHANNEL:
        foreach my $name_node (@prem_chan_name_nodes) {
            my $name = $name_node->textContent;
            foreach my $conf_name (@{$conf->{'premium-channel'}}) {
                if ($name eq $conf_name) {
                    $matched = 1;
                    last CHANNEL;
                }
            }
        }

        # remove this channel if we haven't matched it
        remove_node($entry) unless $matched;
    }

    return $lineup_doc;
}

sub filter_channels_by_xmltv_support {

    my $conf = shift;
    my $opt = shift;
    my $lineup_doc = shift;

    my @entries = $lineup_doc->findnodes( "//lineup-entry" );

    LINEUP_ENTRY:
    foreach my $entry (@entries) {
        my $dn = $entry->findvalue( 'station/name' );

        if (! $entry->exists( "station[\@rfc2838]" ) ) {
            remove_node($entry);
            next LINEUP_ENTRY;
        }
        if ($entry->exists( "station[\@rfc2838='unknown']" ) ) {
            remove_node($entry);
            next LINEUP_ENTRY;
        }
    }

    return $lineup_doc;
}

sub remove_filter_ndoes_from_lineup {

    my $conf = shift;
    my $opt = shift;
    my $lineup_doc = shift;

    my @availability_nodes = $lineup_doc->findnodes( "//availability" );
    foreach my $node (@availability_nodes) {
        remove_node($node);
    }
    my @package_nodes = $lineup_doc->findnodes( "//package" );
    foreach my $node (@package_nodes) {
        remove_node($node);
    }

    return $lineup_doc;
}

sub get_supported_config_channels {

    my $conf = shift;
    my $opt = shift;
    my $xmltv_channels_href = shift;

    CONFIG_CHANNEL:
    foreach my $chan_id (@{$conf->{'channel'}}) {
        t("  Read channel '$chan_id'");
        if (! exists $xmltv_channels_href->{$chan_id}) {
            if (! $opt->{'quiet'}) {
                say("  Configured channel '$chan_id' is unavailable");
            }
            next CONFIG_CHANNEL;
        }
        if ($opt->{'debug'}) {
            say("    Channel '$chan_id' is available in the grabber");
        }
        $xmltv_channels_href->{$chan_id}{'_matched'} = 1;
        next CONFIG_CHANNEL;
    }

    # remove any channels not flagged
    foreach my $id (keys %{$xmltv_channels_href}) {
        unless (exists $xmltv_channels_href->{$id}{'_matched'}) {
            delete $xmltv_channels_href->{$id};
        }
    }

    if ($opt->{'debug'}) {
        say("  A total of " . scalar (keys %{$xmltv_channels_href})
                . " config file channels are supported by the grabber");
    }
    return $xmltv_channels_href;
}

# Take a hashref of configured channel hashes and return a listref of channel
# hashes sorted in ascending display name order
sub sort_wanted_channels_by_name {

    my $channels = shift;
    my $sorted_channels = [];
    my %chan_id_to_name;

    # Only add the non-RT sourced timeshifted channels during configuration,
    # otherwise the configuration could include both Radio Times-sourced
    # timeshifted data, and the timeshifted data we create internally from a
    #regular +0 channel
    foreach my $chan_id (keys %{$channels}) {
        my $chan_name = $channels->{$chan_id}{'display-name'}[0][0];
        if ($chan_name !~ m/\(RT\)$/) {
            $chan_id_to_name{$chan_id} = $chan_name;
        }
    }

    # Create a sorted list of xmltv_ids in ascending order of the
    # corresponding display name (case-insensitive)
    my @sorted_chan_ids = sort {uc($chan_id_to_name{$a}) cmp uc($chan_id_to_name{$b})}
                               keys %chan_id_to_name;

    foreach my $chan_id (@sorted_chan_ids) {
        push @{$sorted_channels}, $channels->{$chan_id};
    }

    return $sorted_channels;
}

# Remove the given node object from an XML document
sub remove_node {

    my $node = shift;
    my $parent = $node->parentNode;
    $parent->removeChild($node);
}

# Prettify XML output. Required when the XML document structure has been
# modified, as removing elements from a parsed XML file will leave line breaks
# in the toString output
sub pretty_print_xml {

    my $doc = shift;
    my $doc_string = $doc->toString();
    $doc_string =~ s/>\s+</></g;

    my $xml = XML::LibXML->new;
    eval { $doc = $xml->parse_string( $doc_string ) };
    die "Error: Could not parse XML string" if ($@);

    return $doc->toString(1);
}

###############################################
############# DEBUG SUBROUTINES ###############
###############################################

sub t {
    my ($message) = @_;
    if ($opt->{debug}) {
        print STDERR $message . "\n";
    }
}

sub print_titles_with_colons {
    if (%prog_titles) {
        my @titles_colons;
        my %precolon;  # store the title elements that appear before and
        my %postcolon; # after the first colon with the full title
        foreach my $title (sort keys %prog_titles) {
            if ($title =~ m/^([^:]+)\s*:\s*(.*)$/) {
                push @titles_colons, $title;
                push @{$precolon{$1}}, $title;
                push @{$postcolon{$2}}, $title;
            }
        }

        if (@titles_colons) {
            say("\nStart of list of titles containing colons");
            say("  " . $_) foreach @titles_colons;

            # now store the possible fixups if we see more than 1 title having
            # common pre/post colon text.
            my @prefixups;
            foreach my $text (sort keys %precolon) {
                if (@{$precolon{$text}} > 1) {
                    push @prefixups, "2|" . $text
                }
            }
            if (@prefixups) {
                say("\nPossible fixups for title:episode :\n");
                say($_) foreach sort @prefixups;
                say("");
            }

            my @postfixups;
            foreach my $text (sort keys %postcolon) {
                if (@{$postcolon{$text}} > 1) {
                    push @postfixups, "3|" . $text
                }
            }
            if (@postfixups) {
                say("\nPossible fixups for episode:title :\n");
                say($_) foreach sort @postfixups;
                say("");
            }
            say("End of list of titles containing colons");
        }
    }
}

sub print_titles_with_hyphens {
    if (%prog_titles) {
        my @titles_hyphens;
        my @fixups;
        foreach my $title (sort keys %prog_titles) {
            if ($title =~ m/\s+-\s+/) {
                push @titles_hyphens, $title;
                my $idx_hyphen = index($title, "-");
                my $idx_colon = index($title, ":"); # -1 = no colon
                # Do not suggest title fixup if colon precedes hyphen
                if ($idx_colon == -1 || $idx_hyphen < $idx_colon) {
                    my $rec_title = $title;
                    $rec_title =~ s/\s+-\s+/: /;
                    push @fixups, "5|" . $title . "~" . $rec_title;
                }
            }
        }

        if (@titles_hyphens) {
            say("\nStart of list of titles containing hyphens");
            say("  " . $_) foreach @titles_hyphens;

            if (@fixups) {
                say("\nPossible fixups for hyphenated titles:\n");
                say($_) foreach sort @fixups;
                say("");
            }
            say("End of list of titles containing hyphens");
        }
    }
}

sub print_new_titles {
    if (%prog_titles) {
        my @titles_special;
        foreach my $title (sort keys %prog_titles) {
            push(@titles_special, $title) if ($title =~ m/Special\b/i);
        }
        if (@titles_special) {
            say("\nStart of list of titles containing \"Special\"");
            say("  " . $_) foreach @titles_special;
            say("End of list of titles containing \"Special\"");
        }

        my @titles_new;
        my @titles_premiere;
        my @titles_finale;
        my @titles_anniv;
        foreach my $title (sort keys %prog_titles) {
            push(@titles_new, $title) if ($title =~ m/^(All New|New)\b/i);
            push(@titles_premiere, $title) if ($title =~ m/Premiere\b/i);
            push(@titles_finale, $title) if ($title =~ m/Final\b/i);
            push(@titles_finale, $title) if ($title =~ m/Finale/i);
            push(@titles_anniv, $title) if ($title =~ m/Anniversary/i);
        }
        if (@titles_new || @titles_premiere || @titles_finale || @titles_anniv) {
            say("\nStart of list of titles containing \"New/Premiere/Finale/etc...\"");
            if (@titles_new) {
                say("  " . $_) foreach @titles_new;
                say("");
            }
            if (@titles_premiere) {
                say("  " . $_) foreach @titles_premiere;
                say("");
            }
            if (@titles_finale) {
                say("  " . $_) foreach @titles_finale;
                say("");
            }
            if (@titles_anniv) {
                say("  " . $_) foreach @titles_anniv;
                say("");
            }
            say("End of list of titles containing \"New/Premiere/Finale/etc...\"");
        }

        my @titles_day;
        my @titles_night;
        my @titles_week;
        foreach my $title (sort keys %prog_titles) {
            push(@titles_day, $title) if ($title =~ m/\bDay\b/i);
            push(@titles_night, $title) if ($title =~ m/\bNight\b/i);
            push(@titles_week, $title) if ($title =~ m/\bWeek\b/i);
        }
        if (@titles_day || @titles_night || @titles_week) {
            say("\nStart of list of titles containing \"Day/Night/Week\"");
            if (@titles_day) {
                say("  " . $_) foreach @titles_day;
                say("");
            }
            if (@titles_night) {
                say("  " . $_) foreach @titles_night;
                say("");
            }
            if (@titles_week) {
                say("  " . $_) foreach @titles_week;
                say("");
            }
            say("End of list of titles containing \"Day/Night/Week\"");
        }

        my @titles_christmas;
        my @titles_newyear;
        foreach my $title (sort keys %prog_titles) {
            push(@titles_christmas, $title) if ($title =~ m/\bChristmas\b/i);
            push(@titles_newyear, $title) if ($title =~ m/\bNew\s+Year/i);
        }
        if (@titles_christmas || @titles_newyear) {
            say("\nStart of list of titles containing \"Christmas/New Year\"");
            if (@titles_christmas) {
                say("  " . $_) foreach @titles_christmas;
                say("");
            }
            if (@titles_newyear) {
                say("  " . $_) foreach @titles_newyear;
                say("");
            }
            say("End of list of titles containing \"Christmas/New Year\"");
        }

        my @titles_bestof;
        my @titles_highlights;
        my @titles_results;
        my @titles_top;
        foreach my $title (sort keys %prog_titles) {
            push(@titles_bestof, $title) if ($title =~ m/Best of\b/i);
            push(@titles_highlights, $title) if ($title =~ m/Highlights/i);
            push(@titles_results, $title) if ($title =~ m/Results?/i);
            push(@titles_top, $title) if ($title =~ m/\bTop\b/i);
        }
        if (@titles_bestof || @titles_results || @titles_top) {
            say("\nStart of list of titles containing \"Results/Best of/Highlights/etc...\"");
            if (@titles_bestof) {
                say("  " . $_) foreach @titles_bestof;
                say("");
            }
            if (@titles_highlights) {
                say("  " . $_) foreach @titles_highlights;
                say("");
            }
            if (@titles_results) {
                say("  " . $_) foreach @titles_results;
                say("");
            }
            if (@titles_top) {
                say("  " . $_) foreach @titles_top;
                say("");
            }
            say("End of list of titles containing \"Results/Best of/Highlights/etc...\"");
        }
    }
}

sub print_uc_titles_post {
    if (%prog_titles) {
        my @titles_uc_post;
        foreach my $title (sort keys %prog_titles) {
            if ($title eq uc($title) && $title !~ m/^\d+$/) {
                push @titles_uc_post, $title;
            }
        }
        if (@titles_uc_post) {
            say("\nStart of list of uppercase titles after processing");
            say("  " . $_) foreach @titles_uc_post;
            say("End of list of uppercase titles after processing");
        }
    }
}

sub print_title_variants {
    if (%prog_titles) {
        # iterate over each unique "normalised" title
        my @titles_variants;
        my @fixups;
        foreach my $unique_title (sort keys %case_insens_titles) {
            if (scalar keys %{$case_insens_titles{$unique_title}} > 1) {
                my %variants;

                # iterate over each actual title seen in listings
                foreach my $title (sort keys %{$case_insens_titles{$unique_title}}) {

                    # need to remove 'count' key before genre processing later
                    my $title_cnt = delete $case_insens_titles{$unique_title}{$title}{'count'};
                    # hash lists of title variants keyed on frequency
                    push @{$variants{$title_cnt}}, $title;

                    my $line = "  $title (";
                    # iterate over each title's genres
                    foreach my $genre (sort keys %{$case_insens_titles{$unique_title}{$title}}) {
                        # iterate over each title's channel availability by genre
                        foreach my $chan (sort keys %{$case_insens_titles{$unique_title}{$title}{$genre}}) {
                            $line .= $genre . "/" . $chan . " [" . $case_insens_titles{$unique_title}{$title}{$genre}{$chan} . " occurences], ";
                        }
                    }
                    $line =~ s/,\s*$//; # remove last comma
                    $line .= ")";
                    push @titles_variants, $line;
                }
                push @titles_variants, "";

                # now find list of titles with highest freq and check if it contains
                # a single entry to use in suggested fixups
                my @title_freqs = sort {$a <=> $b} keys %variants;
                my $highest_freq = $title_freqs[-1];
                my $best_title;
                if (@{$variants{$highest_freq}} == 1) {
                    # grab the title and remove key from $case_insens_titles{$unique_title}
                    $best_title = shift @{$variants{$highest_freq}};
                    delete $case_insens_titles{$unique_title}{$best_title};
                    # now iterate over remaining variations of title and generate fixups
                    foreach my $rem_title (keys %{$case_insens_titles{$unique_title}}) {
                        push @fixups, "5|" . $rem_title . "~" . $best_title;
                    }
                }
            }
        }
        if (@titles_variants) {
            say("\nStart of possible title variations");
            say("  " . $_) foreach @titles_variants;
            if (@fixups) {
                say("\nPossible fixups for title variations:\n");
                say($_) foreach sort @fixups;
                say("");
            }
            say("End of possible title variations");
        }
    }
}

sub print_titles_inc_years {
    if (%prog_titles) {
        my @titles_years;
        foreach my $title (sort keys %prog_titles) {
            if ($title =~ m/\b(19|20)\d{2}\b/) {
                push @titles_years, $title;
            }
        }
        if (@titles_years) {
            say("\nStart of list of titles including possible years");
            say("  " . $_) foreach @titles_years;
            say("End of list of titles including possible years");
        }
    }
}

sub print_titles_inc_bbfc_certs {
    if (%film_titles) {
        my @titles_certs;
        foreach my $title (sort keys %film_titles) {
            if ($title =~ m/\(U|PG|12A|15|18\)$/) {
                push @titles_certs, $title;
            }
        }
        if (@titles_certs) {
            say("\nStart of list of film titles including possible BBFC certificates");
            say("  " . $_) foreach @titles_certs;
            say("End of list of film titles including possible BBFC certificates");
        }
    }
}

sub print_flagged_title_eps {
    if (%flagged_title_eps && scalar keys %flagged_title_eps > 0) {
        my %titles_to_output; # temp hash to store matches
        foreach my $flagged_title (sort keys %flagged_title_eps) {
            foreach my $title (sort keys %prog_titles) {
                if (lc $flagged_title eq lc $title) {
                    $titles_to_output{$flagged_title} = $flagged_title;
                }
            }
        }
        # only output something if at least 1 matching title
        if (%titles_to_output && scalar keys %titles_to_output > 0) {
            say("\nStart of list of titles that may need fixing individually");
            foreach my $title (sort keys %titles_to_output) {
                say("  $title");
            }
            say("End of list of titles that may need fixing individually");
        }
    }
}

sub print_dotdotdot_titles {
    if (%dotdotdot_titles && scalar keys %dotdotdot_titles > 0) {
        my %titles_to_output; # temp hash to store matches
        if (%prog_titles) {
            DOTDOTDOT_TITLE:
            # In %dotdotdot_titles, the key is the 'normalised' title to match,
            # value is the full title to use in replacement
            foreach my $dotdotdot_title (sort keys %dotdotdot_titles) {
                PROG_TITLE:
                foreach my $title (sort keys %prog_titles) {
                    # ignore title having ellipses already
                    next PROG_TITLE if $title =~ m/.*\.\.\.$/;

                    # Ignore some frequent mismatches
                    next PROG_TITLE if ($title =~ m/Unforgetable/i && $title !~ m/^The Unforgetable/);
                    next PROG_TITLE if ($title =~ m/One/i && $title !~ m/^The One/);

                    if ($title =~ m/\b\Q$dotdotdot_title\E\b/i) {
                        $titles_to_output{$title} = $dotdotdot_titles{$dotdotdot_title};
                    }
                }
            }
        }
        # only output something if at least 1 matching title
        if (%titles_to_output && scalar keys %titles_to_output > 0) {
            say("\nStart of list of potential \"...\" titles that may need fixing individually");
            foreach my $title (sort keys %titles_to_output) {
                say("  Title '$title' may need to be fixed based on fixup '$titles_to_output{$title}'");
            }
            say("End of list of potential \"...\" titles that may need fixing individually");
        }
    }
}

sub print_new_title_in_subtitle {
    if (%new_title_in_subtitle_fixed && scalar keys %new_title_in_subtitle_fixed > 0) {
        say("\nStart of list of programmes where 'New \$title' was removed from sub-title field");
        foreach my $prog_ref (sort keys %new_title_in_subtitle_fixed) {
            say("  $new_title_in_subtitle_fixed{$prog_ref}->{'title'} / $new_title_in_subtitle_fixed{$prog_ref}->{'episode'}");
        }
        say("\nEnd of list of programmes where 'New \$title' was removed from sub-title field");
    }
}

sub print_title_in_subtitle {
    if (%title_ep_in_subtitle_fixed && scalar keys %title_ep_in_subtitle_fixed > 0) {
        say("\nStart of list of programmes where title/ep was removed from sub-title field");
        foreach my $prog_ref (sort keys %title_ep_in_subtitle_fixed) {
            say("  $title_ep_in_subtitle_fixed{$prog_ref}->{'title'} / $title_ep_in_subtitle_fixed{$prog_ref}->{'episode'}");
        }
        say("\nEnd of list of programmes where title/ep was removed from sub-title field");
    }
    if (%title_in_subtitle_fixed && scalar keys %title_in_subtitle_fixed > 0) {
        say("\nStart of list of programmes where title was removed from sub-title field");
        foreach my $prog_ref (sort keys %title_in_subtitle_fixed) {
            say("  $title_in_subtitle_fixed{$prog_ref}->{'title'} / $title_in_subtitle_fixed{$prog_ref}->{'episode'}");
        }
        say("\nEnd of list of programmes where title was removed from sub-title field");
    }
    if (%title_in_subtitle_notfixed && scalar keys %title_in_subtitle_notfixed > 0) {
        say("\nStart of list of programmes where title is still present in sub-title field");
        foreach my $prog_ref (sort keys %title_in_subtitle_notfixed) {
            say("  $title_in_subtitle_notfixed{$prog_ref}->{'title'} / $title_in_subtitle_notfixed{$prog_ref}->{'episode'}");
        }
        say("\nEnd of list of programmes where title is still present in sub-title field");
    }
    if (%colon_in_subtitle && scalar keys %colon_in_subtitle > 0) {
        say("\nStart of list of programmes where sub-title contains colon/hyphen");
        foreach my $prog_ref (sort keys %colon_in_subtitle) {
            say("  $colon_in_subtitle{$prog_ref}->{'title'} / $colon_in_subtitle{$prog_ref}->{'episode'}");
        }
        say("\nEnd of list of programmes where sub-title contains colon/hyphen");
    }
}

sub print_categories {
    if (%categories && scalar keys %categories > 0) {
        say("\nStart of list of programme categories seen");
        foreach my $category (sort keys %categories) {
            say("  $category");
        }
        say("End of list of programme categories seen");
    }
}

sub print_uncategorised_progs {
    if (%uncategorised_progs && scalar keys %uncategorised_progs > 0) {
        say("\nStart of list of uncategorised programmes");
        foreach my $title (sort keys %uncategorised_progs) {
            say("  $title");
        }
        say("End of list of uncategorised programmes");
    }
}

sub print_reality_progs {
    if (%reality_progs && scalar keys %reality_progs > 0) {
        say("\nStart of list of Reality programmes");
        foreach my $title (sort keys %reality_progs) {
            say("  $title");
        }
        say("End of list of Reality programmes");
    }
}

sub print_cats_per_prog {
    if (%cats_per_prog) {
        my @titles_cats;
        my @fixups;
        foreach my $title (sort keys %cats_per_prog) {
            if (scalar keys %{$cats_per_prog{$title}} > 1) {
                push @titles_cats, "  '" . $title . "' is categorised as:";
                my $best_cat_cnt = 1;
                my $best_cat = '';
                foreach my $cat (sort keys %{$cats_per_prog{$title}}) {
                    push @titles_cats, "    $cat (" . $cats_per_prog{$title}{$cat} . " occurences)";
                    if ($cats_per_prog{$title}{$cat} > $best_cat_cnt) {
                        $best_cat = $cat;
                        $best_cat_cnt = $cats_per_prog{$title}{$cat};
                    }
                }
                push @titles_cats, "";
                if ($best_cat_cnt > 1) {
                   push @fixups, "6|" . $title . "~" . $best_cat;
                }
            }
        }
        if (@titles_cats) {
            say("\nStart of programmes with multiple categories");
            say("  " . $_) foreach @titles_cats;
            if (@fixups) {
                say("\nPossible fixups for programme categories:\n");
                say($_) foreach sort @fixups;
                say("");
            }
            say("End of programmes with multiple categories");
        }
    }
}

sub print_short_films {
    if (%short_films && scalar keys %short_films > 0) {
        say("\nStart of list of films shorter than 75 mins");
        foreach my $title (sort keys %short_films) {
            say("  $title");
        }
        say("End of list of films shorter than 75 mins");
    }
}

sub print_possible_prog_numbering {
    if (%possible_series_nums && scalar keys %possible_series_nums > 0) {
        say("\nStart of list of possible series numbering seen in listings");
        foreach my $poss (sort keys %possible_series_nums) {
            say("  $poss");
        }
        say("End of list of possible series numbering seen in listings");
    }
    if (%possible_episode_nums && scalar keys %possible_episode_nums > 0) {
        say("\nStart of list of possible episode numbering seen in listings");
        foreach my $poss (sort keys %possible_episode_nums) {
            say("  $poss");
        }
        say("End of list of possible episode numbering seen in listings");
    }
    if (%possible_part_nums && scalar keys %possible_part_nums > 0) {
        say("\nStart of list of possible part numbering seen in listings");
        foreach my $poss (sort keys %possible_part_nums) {
            say("  $poss");
        }
        say("End of list of possible part numbering seen in listings");
    }
    if (%title_text_to_remove && scalar keys %title_text_to_remove > 0) {
        say("\nStart of list of titles containing \"Season\"");
        foreach my $t (sort keys %title_text_to_remove) {
            say("  $t");
        }
        say("End of list of titles containing \"Season\"");
    }
}

sub print_misencoded_utf8_data {
    if (%hasC27F9Fchars && scalar keys %hasC27F9Fchars > 0) {
        say("\nStart of list of channels containing unhandled bytes in range [C2][7F-9F]");
        foreach my $chan (sort keys %hasC27F9Fchars) {
            say("  $chan ($hasC27F9Fchars{$chan})");
        }
        say("End of list of channels");
    }
    if (%hadEFBFBD && scalar keys %hadEFBFBD > 0) {
        say("\nStart of list of channels containing Unicode Replacement Character");
        foreach my $chan (sort keys %hadEFBFBD) {
            say("  $chan ($hadEFBFBD{$chan})");
        }
        say("End of list of channels");
    }
    if (%hadC3AFC2BFC2BD && scalar keys %hadC3AFC2BFC2BD > 0) {
        say("\nStart of list of channels containing double-encoded Unicode Replacement Character");
        foreach my $chan (sort keys %hadC3AFC2BFC2BD) {
            say("  $chan ($hadC3AFC2BFC2BD{$chan})");
        }
        say("End of list of channels");
    }
}

sub print_empty_listings {
    if (%empty_listings && scalar keys %empty_listings > 0) {
        say("\nStart of list of channels providing no listings");
        foreach my $chan (sort keys %empty_listings) {
            say("  $chan ($empty_listings{$chan})");
        }
        say("End of list of channels providing no listings");
    }
}

sub print_unhandled_credits_roles {
    if (%seen_roles && scalar keys %seen_roles > 0) {
        say("\nStart of list of unhandled credits roles");
        foreach my $role (sort keys %seen_roles) {
            say("  $role");
        }
        say("End of list of unhandled credits roles");
    }
}

__END__

=head1 NAME

tv_grab_uk_rt - Grab TV listings for United Kingdom/Republic of Ireland

=head1 SYNOPSIS

tv_grab_uk_rt --help

tv_grab_uk_rt --version

tv_grab_uk_rt --capabilities

tv_grab_uk_rt --description

tv_grab_uk_rt [--config-file FILE]
              [--days N] [--offset N]
              [--output FILE] [--quiet] [--debug]

tv_grab_uk_rt --configure [--config-file FILE]

tv_grab_uk_rt --configure-api [--stage NAME]
              [--config-file FILE] [--output FILE]

tv_grab_uk_rt --list-channels [--config-file FILE]
              [--output FILE] [--quiet] [--debug]

tv_grab_uk_rt --list-lineups

tv_grab_uk_rt --get-lineup [--config-file FILE]

=head1 DESCRIPTION

Output TV listings in XMLTV format for many channels available in the
United Kingdom and Republic of Ireland.  Source data comes from
machine-readable files made available from the Radio Times website.

=head1 USAGE

First run B<tv_grab_uk_rt --configure> to choose which channels you want to
receive listings for.  Then run B<tv_grab_uk_rt> (with optional arguments) to get
around 14 days of listings for your configured channels.

=head1 OPTIONS

B<--help> Print a help message and exit.

B<--version> Show the versions of the XMLTV libraries, the grabber and of
key modules used for processing listings.

B<--capabilities> Show which capabilities the grabber supports. For more
information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities>

B<--description> Show a brief description of the grabber.

B<--config-file FILE> Specify the name of the configuration file to use.
If not specified, a default of B<~/.xmltv/tv_grab_uk_rt.conf> is used.  This
is also the default file written by B<--configure> and read when grabbing.

B<--output FILE> When grabbing, write output to FILE rather than to standard
output.

B<--days N> When grabbing, grab N days of data instead of all available.
Supported values are 1-15.

B<--offset N> Start grabbing at today + N days. Supported values are 0-14.

Note that due to the format of the source data, tv_grab_uk_rt always downloads
data for all available days and then filters for days specified with --days and
--offset. Specifying --days and/or --offset in order to speed up downloads or
reduce data transfer will therefore have no effect.

B<--quiet> Suppress all progress messages normally written to standard error.

B<--debug> Provide detailed progress messages to standard error. Due to the
volume of debug information produced, it is not advised to use this option
during normal grabber use.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--configure> Prompt for which channels/lineup to download listings for,
where to store the cache directory for retrieved listings, what character
encoding to use for output, and whether to enable programme title and bad
character fixups.

B<--list-channels> Outputs an XML document containing all channels
available to the grabber.

B<--list-lineups> Outputs an XML document containing all channel lineups
available to the grabber.

B<--get-lineup> Outputs an XML document containing the configured channel
lineup.

=head1 SOURCE DATA TERMS OF USE

All data is the copyright of the Radio Times and the use of this data is
restricted to personal use only. Commercial use of this data is forbidden.
L<http://www.radiotimes.com/>

In accessing this XML feed, you agree that you will only access its contents
for your own personal and non-commercial use and not for any commercial
or other purposes, including advertising or selling any goods or services,
including any third-party software applications available to the general public.

=head1 CHANNEL LINEUPS

A channel lineup is a list of TV and radio channels that are available in a
particular location on a particular TV platform (e.g. Freeview).

Whilst configurations containing individual "channel=..." entries are still
supported, the grabber allows a user to select their location and TV platform
at configuration time and have their channel lineup generated dynamically
at runtime. This means that if a new channel launches or a channel ceases
broadcasting, an update to the relevant lineup (stored on the XMLTV server)
will result in listings  containing such channel changes without any reconfiguration by the user.

=head1 CHARACTER ENCODING

During configuration, the software asks the user to choose the character
encoding to be used for output. Currently supported encodings are UTF-8 and
ISO-8859-1.

=head1 TITLE PROCESSING

Over time, listings may contain inconsistent programme details, such as
the programme title combined with episode details for some showings of a
programme, but separate for others; or the episode title being given as the
programme title, and the programme title given as the episode title. Some
programme titles may also change slightly over time, or across channels.

Enabling title processing during configuration enables this software to
process programme titles against a list of flagged titles. The
software will correct such programme titles, which in turn should result in
better performance of PVR software which rely on consistent programme data.
Please be aware that enabling title processing will result in the grabber
taking slightly longer to complete its operation due to the extra
processing overhead.

N.B. Please note that title updates can clearly alter programme titles near
to transmission time, and it is therefore quite possible for PVR schedules
to fail if they have been configured using an old title. Whilst care is
taken to ensure title updates are made as far ahead of transmission as
possible, be aware that last minute updates can be made.

=head1 MIS-ENCODED UTF-8 SOURCE DATA

Prior to the transition of the XMLTV service to metabroadcast.com in December
2011, there was an ongoing issue with source data containing mis-encoded UTF-8
characters. Since the transition, the source data should be UTF-8 safe and
automatic processing of the data may not be required.  A configuration option
is provided to permit detection and correction of such character encoding
errors and users are recommended to enable this option during configuration.

=head1 PERFORMANCE

Improvements to date and time handling in the grabber have increased performance
6-7X. Grabbing 14 days of listings with utf-8 and title fixups enabled
should take about 2 seconds per configured channel on a typical machine.

=head1 ERROR HANDLING

tv_grab_uk_rt will only terminate early if it is impossible to continue with grabbing
data. This can be due to a lack of channel configuration data, a bad/missing
configuration file, or filesystem permission problems. Running the grabber in
non-quiet mode should report why the grabber failed.

Non-fatal errors are reported during a grabber run, and can result in listings
for a channel being skipped either in part, or entirely. Progress messages
will state why data is missing when it is possible to do so. A non-zero exit
status will normally be given when the grabber has encountered problems
during listings retrieval.

=head1 ENVIRONMENT VARIABLES

The environment variable HOME can be set to change where the configuration
file is stored. All configuration is stored in $HOME/.xmltv/ by default. On
Windows it might be necessary to set HOME to a pathname containing no spaces.

The environment variable XMLTV_SUPPLEMENT can be set to change where the
supplemental XMLTV files are retrieved from. By default, the file is
retrieved from the XMLTV supplement server. See L<XMLTV::Supplement> for
more information.

If you want the grabber to use customised local copies of the supplemental
files, you should set XMLTV_SUPPLEMENT to the path of the directory containing
a tv_grab_uk_rt/ directory containing the supplement files. For example, if
your local supplement files are stored in /usr/local/share/xmltv/tv_grab_uk_rt/
you should `export XMLTV_SUPPLEMENT="/usr/local/share/xmltv/"` before running the
grabber.

=head1 RADIO LISTINGS

Ironically, the Radio Times feed does not offer listings for radio. They
have been asked about the possibility of adding radio listings, but stated
that this would require significant development effort. It has not been
ruled out entirely, but is unlikely to be added soon.

Users who would like to obtain BBC radio listings in XMLTV format are advised
to investigate a new grabber that obtains listings from the BBC Backstage
service. See L<http://wiki.xmltv.org/index.php/BBC_Backstage> for more
information.

=head1 MAILING LIST

You can subscribe to and read the XMLTV users mailing list by visiting
L<http://lists.sourceforge.net/lists/listinfo/xmltv-users>. This is a source
of help and advice for new users. A searchable archive of the list is
available at L<http://news.gmane.org/gmane.comp.tv.xmltv.general>.

=head1 SEE ALSO

L<xmltv(5)>, L<http://wiki.xmltv.org>, L<http://www.radiotimes.com/>

=head1 BUGS

If you encounter a reproducible bug, please report it on the XMLTV bug
tracker at L<http://sourceforge.net/tracker/?group_id=39046&atid=424135>,
making sure you assign the bug to the tv_grab_uk_rt category. Please check
that the bug has not already been reported.

The source data on the Radio Times website is generated daily before 0600.
Occasionally the source data may not get recreated, leaving
the source files for some (or all) channels empty. Users are encouraged
to wait at least 1 day before reporting an issue with missing listings,
as they frequently reappear in the next update or later the same day. If listings continue to
be missing from the Radio Times website, please report the fact on the XMLTV users
mailing list.

There have been several occasions in the past when the Radio Times channel index has been
missing from the Radio Times website. This file is essential to being able to
run the grabber, as it contains the list of channels having available listings
data. If this file is missing or empty, and there is no locally-cached copy of
the file, it will not be possible to run the grabber. The file usually
regenerates automatically over the course of the next day, at which point it
will be possible to run the grabber successfully.

There are no other reported ongoing issues.

=head1 AUTHOR

Since 2007 the maintainer has been Nick Morrott (knowledgejunkie at gmail dot com).

The original author was Ed Avis (ed at membled dot com). Parts of this code
were copied from tv_grab_se_swedb by Mattias Holmlund, and from the XMLTV
wiki L<http://wiki.xmltv.org/>. Regional postcode information was kindly
made available from L<http://www.ukfree.tv>.

=cut
xmltv-util 0.5.67-0.1 / usr / bin / tv_grab_uk_rt