#!/usr/bin/perl -w 
# Russian grabber v 0.4.8
#
# Changelog: 
# 0.4.8 - changes in the processdate routine
# 0.4.7 - misc fixes
# 0.4.6 - Fixed output encoding parameter failing to work in CP1251 locale
# 0.4.5 - Default output time zone = UTC, fixed a bug that made december appear as february and another bug that caused double charset conversion with unzipped local files 
# 0.4.4 - set the source timezone to EET, implemented --output option, updated channel_ids, fixed non-recodible quotes
# 0.4.3 - use only ListTV channel names to recognize channels. Preparation for 0.5
# 0.4 - icons support
# 0.3 - autorecognition of local data format (zip, dir, txt)
# 0.2 - first working version
# 0.1 - first running version

=pod

=head1 NAME

tv_grab_ru_sat - Grab TV listings for Russian channels.

=head1 SYNOPSIS

tv_grab_ru_sat --help

tv_grab_ru_sat [--config-file FILE] --configure [--gui OPTION] [--local FILE or DIR]

tv_grab_ru_sat [--config-file FILE] [--output FILE] [--days N]
           [--offset N] [--quiet] [--local FILE or DIR] [--encoding CODEPAGE]

tv_grab_ru_sat --list-channels [--local FILE or DIR] [--encoding CODEPAGE]

=head1 DESCRIPTION

Output TV listings for Russian channels. The grabber can use any ListTV or
TVagent data files. It will download data kindly supplied by http://www.teleweb.ru
(maintained by Pavel Vlasov). If for any reason you cannot access the Internet 
download site, you can still use any ListTV files locally either zipped or unzipped 
to disk. The best time to run this script is probably on Sunday evening
or Monday morning.

First run B<tv_grab_ru_sat --configure> to choose, which channels you want
to download. Then running B<tv_grab_ru_sat> with no arguments will output
listings in XML format to standard output.

B<--configure> Prompt for which channels,
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_ru_sat.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.

B<--output FILE> write to FILE rather than standard output.

B<--days N> grab N days.  The default is 7. This has little meaning because 
the data seems to be refreshed once a week on Sundays and always covers 7 days. 
Any choice will either remove some information or have no effect.

B<--offset N> start N days in the future.  The default is to start
from today.

B<--quiet> suppress the progress messages normally written to standard
error.

B<--local FILE or DIR> read data from a local file or directory instead of downloading.
Accepts path to zip archive, ListTV text file or a directory with ListTV txt files. 

B<--encoding CODEPAGE> output XML using CODEPAGE. Default is Unicode UTF-8 unless
your locale is ru_RU. In Russian environment the system default encoding is used. 

B<--timezone TZ> Convert all programme times to specified timezone. If this option 
is absent, the program switches to automatic mode. It detects current timezone and 
uses it to write data. If autodetection fails, try setting environment variable TZ. 
Time zones should be specified according to RFC 822:

    Universal:  GMT, UT
    Abbreviations :  EET, MSK, MSD, BT, ZP4, ZP5 etc. 
    Military :  A to Z (except J)
    Other    :  +HHMM or -HHMM
    ISO 8601 :  +HH:MM, +HH, -HH:MM, -HH

B<--help> print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

Vyacheslav Dikonov, sdiconov@mail.ru

=head1 BUGS

=cut

######################################################################
# Initialisation

use strict;

use utf8;
use POSIX;
use Encode;
use Memoize; # 2x speedup for the parser
memoize('ParseDate');
memoize('processdate');
memoize('Date_ConvTZ');
memoize('Date_Cmp');

use XMLTV;
use XMLTV::Get_nice;
# Options
use Getopt::Long;
use XMLTV::Ask;
use XMLTV::Mode;
use XMLTV::Config_file;
# get_channels
use XMLTV::ProgressBar;
use Date::Manip;
use XMLTV::Date;
use File::Spec::Functions;

# Read ZIP
use Carp;
use Compress::Zlib;
use File::Basename;
use File::Copy;
use File::Find;
use File::Path;
use File::Spec;
use IO::File;
use IO::Seekable;
use Time::Local;
use Archive::Zip;
    
use XMLTV::Usage <<END
$0: get Russian television listings in XMLTV format
To configure: $0 --configure [--config-file FILE] [--local FILE or DIR]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N] [--offset N] [--quiet] 
                [--local FILE or DIR] [--encoding CODEPAGE] [--timezone TZ]
To list channels: $0 --list-channels [--local FILE or DIR] [--encoding CODEPAGE]
END
  ;

delete $ENV{BASH_ENV};
$ENV{PATH} = '/bin:/usr/bin';

######################################################################
# get options

my ($opt_days, $opt_offset, $opt_help, $opt_output,
    $opt_configure, $opt_config_file, $opt_gui,
    $opt_quiet, $opt_list_channels, $opt_local_path, 
    $opt_local_dir, $opt_output_encoding, $opt_output_timezone);
$opt_days  = 7; # default
$opt_offset = 0; # default
$opt_quiet  = 0; # default
GetOptions('days=i'        => \$opt_days,
	   'offset=i'      => \$opt_offset,
	   'help'          => \$opt_help,
	   'configure'     => \$opt_configure,
	   'config-file=s' => \$opt_config_file,
	   'local=s'       => \$opt_local_path,
           'gui:s'         => \$opt_gui,
	   'output=s'      => \$opt_output,
	   'quiet'         => \$opt_quiet,
	   'list-channels' => \$opt_list_channels,
	   'encoding=s'    => \$opt_output_encoding,
	   'timezone=s'    => \$opt_output_timezone
	  )
  or usage(0);
die 'number of days must not be negative'
  if (defined $opt_days && $opt_days < 0);
usage(1) if $opt_help;

XMLTV::Ask::init($opt_gui);

my $mode = XMLTV::Mode::mode('grab', # default
			     $opt_configure => 'configure',
			     $opt_list_channels => 'list-channels',
			    );


######################################################################
# Set global variables

my $url_icons = "http://www.free-x.de/teleweb/icons";
my $url_datafile = "http://www.free-x.de/teleweb/tvlist.zip";
my $input_encoding = "windows-1251";
my $input_timezone = "EET"; # winter. Eastern Europe, USSR 1 (Kiev, Kalinigrad)
#my $input_timezone = "EEST"; # summer. Eastern Europe, USSR 1 (Kiev, Kalinigrad)

my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_ru', $opt_quiet); # File that stores which channels to download.
my $SHARE_DIR = "/usr/share/xmltv/tv_grab_ru";
my $id_file = "channel_ids"; # file of id aliases and names
$XMLTV::Get_nice::Delay = 0; # No need to be nice.
Date_Init("DateFormat=non-US", "Internal=1");

my %writer_args; # XMLTV::Writer arguments (XML encoding and output)
my %pr_data; # ID - @list of programmes (start, name)
my %ch_data; # ID - Name hashlist of channels
my @id_alias; # ID aliases and names for channel matching
my $new_ch_counter = 0; #count new channels for which no id can be found

# Set or detect local timezone
if ( not $opt_output_timezone ) { $opt_output_timezone = "UTC" };

# Detect locale and set output encoding if possible
my $syslocale = setlocale(LC_CTYPE);
my $sysencoding;
my $syslanguage;
my $syscountry;

if ( $syslocale =~ m/\./ ) { 
    ($sysencoding = $syslocale) =~ s/.*\.//; 
    $syslocale =~ s/\..*//;
};

if ( $syslocale =~ m/_/ ) { 
    ($syslanguage = $syslocale) =~ s/_.*//;
    ($syscountry = $syslocale) =~ s/.*_//;
} else { 
    $syslanguage = $syslocale 
};

if (defined $sysencoding and $sysencoding =~ m/(1251|koi8|koi-8|iso8859-5|866)/i ) {
    if (not defined $opt_output_encoding) {$opt_output_encoding = $sysencoding};   # Use sysenc if output enc option is not given 
    if ($opt_output_encoding eq "CP1251") {$opt_output_encoding = "windows-1251"}; # Fix for perl frontends  
} else { $opt_output_encoding = "UTF-8" };

# Define XMLTV::Writer parameters
if ($mode eq 'list-channels' or $mode eq 'grab') { # only writing modes need this  
    $writer_args{encoding} = $opt_output_encoding; # sets the encoding field in the XML header
    if (defined $opt_output) { # if the --output option is present
	$writer_args{OUTPUT} = new IO::File(">$opt_output"); # create the output file
	die "cannot write to $opt_output: $!" if not defined $writer_args{OUTPUT};
    };
};

######################################################################
# Configure

if ($mode eq 'configure') {

    my @names;

    XMLTV::Config_file::check_no_overwrite($config_file);
    
    get_data();
    # unless run in grab mode this routine only retrieves channel names (hacky speedup in parsetxt)
    
    # write configuration
    if ($mode eq 'configure') {
	open(CONF, "> $config_file") or	die "cannot write to $config_file: $!"; 

	# Ask about each channel.
	my @chs = sort keys %ch_data;
	@names = map {encodeoutstr("$ch_data{$_}")} @chs;
	my @want = ask_many_boolean(1, @names);
	foreach (@chs) {
	    my $w = shift @want;
	    warn("cannot read input, stopping channel questions"), last
	      if not defined $w;
	    print CONF '#' if not $w;
	    my $name = shift @names;
	    print CONF encode($opt_output_encoding, "channel $_ $name\n");
	};

	close CONF or warn "cannot close $config_file: $!";
        say("Finished configuration.");

        exit();
    }
}

######################################################################
# List channels
elsif ($mode eq 'list-channels') {

    get_data();
    # unless run in grab mode this routine only retrieves channel names (hacky speedup in parsetxt)

    my $writer = new XMLTV::Writer(%writer_args);
    $writer->start({ 	'source-info-url' => "http://www.teleweb.ru",
			'source-data-url' => $url_datafile,
			'generator-info-name' => "XMLTV",
			'generator-info-url' => "http://membled.com/work/apps/xmltv/" });

    foreach (keys(%ch_data)) {
        $writer->write_channel({id => $_, 'display-name' => [ [ encode($opt_output_encoding, $ch_data{$_}), 'ru' ] ]});
    };
    $writer->end();

    exit();
}

######################################################################
# Grab

elsif ($mode eq 'grab') {

my %rawdata;
    my @config_lines = XMLTV::Config_file::read_lines($config_file);
    my (%channels, %prog);
    my ($ch_id, $ch_name, $pr_start);
    
    # Read configuration
    my $line_num = 1;
    foreach (@config_lines) {
        ++ $line_num;
        next if not defined;
        if (/^channel:?\s+(\S+)\s+([^\#]+)/) {
	    $ch_id = $1;
	    $ch_name = $2;
	    $ch_name =~ s/\s*$//;
	    $channels{$ch_id} = $ch_name;
	}
	else {
	    warn "$config_file:$line_num: bad line\n";
        }
    };

    die "No channels specified, run me with --configure\n" if not keys %channels;

    # Set time limits
    my $now = DateCalc(parse_date('now'), "$opt_offset days");
    my $timefiltstart = $now;
    if ($opt_offset) { $timefiltstart = DateCalc($now,"+ $opt_offset days") };
    my $timefiltend = DateCalc($timefiltstart,"+ $opt_days days");

    # Process and output
    get_data(\%channels); 
    # we give reference to the selected channels list for speedup

    my $bar = new XMLTV::ProgressBar("Writing data", 101) if not $opt_quiet;
    my $count = 0; # count cycles

    my $writer = new XMLTV::Writer(%writer_args); # Header and channels
    $writer->start({ 	'source-info-url' => "http://www.teleweb.ru",
			'source-data-url' => $url_datafile,
			'generator-info-name' => "XMLTV",
			'generator-info-url' => "http://membled.com/work/apps/xmltv/" });

    foreach $ch_id (keys %channels ) { # Write channels
	$count ++; # count cycles for bar update	    
	if ( exists($ch_data{$ch_id}) ) { # Only wanted channels
            $writer->write_channel({'id' => $ch_id, 
				    'display-name' => [ [ encode($opt_output_encoding, $ch_data{$ch_id}), 'ru' ] ], 
				    'icon' => [ { 'src' => $url_icons."/".$ch_id.".png" } ] });
	};
	update $bar (100/(((keys %channels) * 2)/$count)) if not $opt_quiet;
    };

    foreach $ch_id (sort (keys %channels)) { # Write progs
	$count ++; # count cycles for bar update	    
	if ( exists($ch_data{$ch_id}) ) { # Only wanted channels 
	    foreach $pr_start (sort (keys %{$pr_data{$ch_id}}) ) {

		my $futureflag = Date_Cmp($pr_start, $timefiltend);
		my $pastflag; # Include currently broadcasted programme, if its stop time is known
		if ( defined ${${$pr_data{$ch_id}}{$pr_start}}{'stop'} ) {
		    $pastflag = Date_Cmp(${${$pr_data{$ch_id}}{$pr_start}}{'stop'}, $timefiltstart);
		} else {$pastflag = Date_Cmp($pr_start, $timefiltstart) };
	    
		if ( $pastflag >= 0 and $futureflag <= 0 ) { # Only falling into the specified timegap

		    $prog{'channel'} = $ch_id;
		    $prog{'start'} = $pr_start;
		    if ( defined ${${$pr_data{$ch_id}}{$pr_start}}{'stop'} ) {
			$prog{'stop'} = ${${$pr_data{$ch_id}}{$pr_start}}{'stop'};
		    };	
		    $prog{'title'} = [ [ encode($opt_output_encoding, ${${$pr_data{$ch_id}}{$pr_start}}{'title'} ), 'ru' ] ];
		    if ( defined ${${$pr_data{$ch_id}}{$pr_start}}{'desc'} ) {
			$prog{'desc'} = [ [ encode($opt_output_encoding, ${${$pr_data{$ch_id}}{$pr_start}}{'desc'} ), 'ru' ] ];
		    };
		    if ( defined ${${$pr_data{$ch_id}}{$pr_start}}{'category'} ) {
			$prog{'category'} = [ [ encode($opt_output_encoding, ${${$pr_data{$ch_id}}{$pr_start}}{'category'} ), 'ru' ] ];
		    };
		    $writer->write_programme(\%prog);
		    undef %prog;
		};
	    };
	};
	update $bar (100/(((keys %channels) * 2)/$count)) if not $opt_quiet;
    };

    $writer->end();
    update $bar (101) if not $opt_quiet;
    $bar->finish() if not $opt_quiet;

exit();
}
else { die };

# END

######################################################
# Service routines

sub cleanupstr ($) { # Remove leading and trailing spaces
    my $str=$_[0];

    if (not defined $str) {$str = ""}  else {
	$str =~ s/^\s+//;  
	$str =~ s/\s+$//;  

	# Fix some bad symbols that do not recode well
	s/—/-/g; # m-dash
	s/…/.../g; # ellipsis
	s/‘/\"/g; # Left quote
	s/’/\"/g; # Right quote
    };

    return $str; 
};

sub cookstr($) { # helps to distinguish strings that are really different from alternative writings
    my $str=$_[0];

    $str =~ s/\s//g;
    $str =~ s/\"//g;
    $str =~ s/\'//g;
    $str =~ s/\-//g;
    $str =~ s/\(//g;
    $str =~ s/\)//g;
    $str =~ s/\.//g;
    $str =~ s/\,//g;
    $str =~ s/\?//g;
    $str =~ s/\!//g;
    $str =~ s/\&//g;
    $str =~ s/\$//g;
    $str = lc($str);
    
    return $str;
};

sub encodeoutstr($) { # encode channel names to system encoding in console mode or use utf8 for GUI 
    my $str=$_[0];

	if (defined $opt_gui or not defined $sysencoding) { 
	    return $str;
	} else {
	    return encode($sysencoding, $str);
	};
};

sub processdate ($) { # DateString
    my $mo = "(понедельник|пон|пн)";
    my $tu = "(вторник|втр|вт)";
    my $we = "(среда|срд|ср)";
    my $th = "(четверг|чтв|чт)";
    my $fr = "(пятница|птн|пт)";
    my $sa = "(суббота|сбт|сб)";
    my $su = "(воскресенье|вск|вс)";
	
    my $jan  = "(января|январь|янв)";
    my $feb  = "(февраля|февраль|фев)";
    my $mar  = "(марта|март|мар)";
    my $apr  = "(апреля|апрель|апр)";
    my $may  = "(мая|май)";
    my $jun  = "(июня|июнь|июн)";
    my $jul  = "(июля|июль|июл)";
    my $aug  = "(августа|август|авг)";
    my $sep  = "(сентября|сентябрь|сен)";
    my $oct  = "(октября|октябрь|окт)";
    my $nov  = "(ноября|ноябрь|ноя)";
    my $dec  = "(декабря|декабрь|дек)";

    my $garbage = "(го|е|ое|г|г.|-го|-е|-ое)";    
    my $str = $_[0];

    if (defined $str and length($str) >= 5 and $str =~ m/.*[0-9].*/) { # A date string cannot be shorter than 5 symbols and must contain a digit 
								       # It rejects obvious garbage, such as the "tv.all" signature of teleweb files.
	$str =~ s/$mo/Mon/i; # Replace all possible Russian/Ukrainian etc. names of the days and months  by English abbreviations 
	$str =~ s/$tu/Tue/i; # This procedure should accept freeform date string and make it suitable for Date::Manip
	$str =~ s/$we/Wed/i;
	$str =~ s/$th/Thu/i;
	$str =~ s/$fr/Fri/i;
	$str =~ s/$sa/Sat/i;
        $str =~ s/$su/Sun/i;

	$str =~ s/$jan/Jan/i;
        $str =~ s/$feb/Feb/i;
        $str =~ s/$mar/Mar/i;
        $str =~ s/$apr/Apr/i;
        $str =~ s/$may/May/i;
        $str =~ s/$jun/Jun/i;
        $str =~ s/$jul/Jul/i;
        $str =~ s/$aug/Aug/i;
        $str =~ s/$sep/Sep/i;
        $str =~ s/$oct/Oct/i;
        $str =~ s/$nov/Nov/i;
        $str =~ s/$dec/Dec/i;

	$str =~ s/$garbage//i; # Remove possible Russian suffixes
	$str =~ s/(\.|\,)/ /; # Mon. 4, april...
	
	$str = cleanupstr($str);
	
	return $str;
    } else { return "" };
};

sub setcategory ($) { #Try to guess type of the programme

    my %cat = ( '(Х\/ф|Худ\.фильм|Художественный фильм|Комедия|Мелодрама|Триллер|Боевик|Ужасов|Ужастик|Кинофильм|Телефильм)' => 'Художественный фильм',
    	'(Т\/с|Телесериал|^Сериал| Сериал)' => 'Телесериал',
    	'(М\/с|Мультсериал|Мультипликационный сериал)' => 'Мультипликационный сериал',
    	'(М\/ф|Мультфильм|Мультипликационный фильм)' => 'Мультфильм',
    	'(Д\/с|Док\.сериал|Документальный сериал)' => 'Документальный сериал',
    	'(Д\/ф|Док\.фильм|Документальный фильм|Кинофильм|Телефильм)' => 'Документальный фильм',
    	'(Фильм-спектакль|Спектакль)' => 'Спектакль',
    	'Концерт' => 'Концерт',
    	'Комедия' => 'Комедия',
    	'Мелодрама' => 'Мелодрама',
    	'Триллер' => 'Триллер',
    	'Боевик' => 'Боевик',
    	'(Ужасов|Ужастик)' => 'Фильм ужасов',
   	'(Новости|Вести|Время)' => 'Новости'
#   	'Шоу'
    );

    my $pr_name = $_[0];
    my $pr_cat;
    
    foreach (sort (keys %cat)) {
	if ( $pr_name =~ m/$_/i ) {
	    if ( defined $pr_cat) {$pr_cat .= " / ".$cat{$_};}
	    else {$pr_cat = $cat{$_};};		
	};
    };    
    return $pr_cat;
};

sub commitprog ($$$$) { # Put a prog to the channel structure (Which is later put to the global data structure)

    my ($data, $prog, $new_year, $late_prog) = @_;
        
    foreach (keys %{$prog} ) { 
	
	# Here we process exactly one programme at a time 
	# All records of type 8:00,9:00,11:00 News will turn into multiple programmes
     
	my $pr_start = ParseDate($_);
	if (${$new_year}) { $pr_start = DateCalc($pr_start,"+ 1 year") }; #Happy new year! 
	if (${$late_prog}) { $pr_start = DateCalc($pr_start,"+ 1 day") }; #late prog  
	if ($opt_output_timezone) {
	    $pr_start = Date_ConvTZ($pr_start, $input_timezone, $opt_output_timezone)." ".$opt_output_timezone;
	}; # Set correct timezone

	# Put a programme into the channel listing
	${${$data}{$pr_start}}{'title'} = ${${$prog}{$_}}{'title'};
	${${$data}{$pr_start}}{'desc'} = ${${$prog}{$_}}{'desc'};
	${${$data}{$pr_start}}{'category'} = setcategory(${${$prog}{$_}}{'title'});
    };	
};

sub commitday ($$) { # populate global programme data structure

    my ($ch_id, $day_pr) = @_;
    my @times = (sort (keys %{$day_pr}));
    my $i;
    for ($i=0; $i <= $#times; $i++)  {
	    ${${$pr_data{$ch_id}}{$times[$i]}}{'title'} = ${${$day_pr}{$times[$i]}}{'title'};
	    if ( defined ${${$day_pr}{$times[$i]}}{'desc'}) {
		${${$pr_data{$ch_id}}{$times[$i]}}{'desc'} = ${${$day_pr}{$times[$i]}}{'desc'};
	    };	
	    if ( defined ${${$day_pr}{$times[$i]}}{'category'}) {
		${${$pr_data{$ch_id}}{$times[$i]}}{'category'} = ${${$day_pr}{$times[$i]}}{'category'};
	    };
	    #Only now we can set fake stop times.
	    if ($i < ($#times - 1)) { #Assume that stop time of a prog is start time of the next one 
		${${$pr_data{$ch_id}}{$times[$i]}}{'stop'} = $times[$i+1];
	    }; # Last known programme of the day has no stop time
    };
}


# Process contents of a ListTV txt file provided as an array of strings
# results are stored in the global data structure
sub parsetxt ($$$) { # filename for ID, array 

    my ($file, $txt, $channels) = @_ ;
    my (@dayheader, %day_pr, %current_pr, $datestr);
    my ($ch_id, $ch_name);
    my ($pr_name, $pr_start, $pr_date);
    my $new_year = 0;
    my ($pr_hour, $pr_latesthour, $pr_late) = [0, 0, 0]; #hour tracking for date shift detection

    unless (eval {
    # read the whole list
    foreach (@{$txt}) {

	cleanupstr($_); 
	
	if ( m/^[0-9]+:[0-9][0-9]/ ) { # Programmes

	    if ( $mode eq 'grab' ) { # Only grab mode requires this data

		commitprog(\%day_pr, \%current_pr, \$new_year, \$pr_late); #New record found, save already formed programme
		undef %current_pr;
	    
		if ( defined $pr_date ) { 
		    $_ =~ s/,\s(?=[0-9]+:[0-9][0-9])/,/g; # 8:00, 9:00 -> 8:00,9:00
		    ($pr_start = $_) =~ s/ .*//;
		    ($pr_name = $_) =~ s/$pr_start//;
		    $pr_name = cleanupstr($pr_name);

		    # Remember highest hour value to detect late night programmes 
		    ($pr_hour = $pr_start) =~ s/.*\s//; $pr_hour =~ s/:.*//; $pr_hour =~ s/^0//;
		    if ($pr_latesthour <= $pr_hour) {$pr_latesthour = $pr_hour} else {$pr_late = 1};
		    
		    if ($pr_start =~ m/,/) { # 8:00,9:00,12:00 News
			my @tmp = split (/,/, $pr_start); 
			foreach (@tmp) { ${$current_pr{$pr_date." ".$_}}{'title'} = $pr_name };
		    } else { # 8:00 Good Morning 		    
			${$current_pr{$pr_date." ".$pr_start}}{'title'} = $pr_name;	    
		    };
		
		} else { say "Warning: Broken file format for channel \"".$ch_id."\"! No date info found for programmes \"".encodeoutstr($_)."\"" };
	    };

	} elsif ( m/./ ) { # All other non-empty lines come here

	    @dayheader = split(/\./, "$_"); #Dots are separators FIXME (This is lame and teleweb bound) 
	    $datestr = processdate($dayheader[0]); #Help Date::Manip to parse date
	    $dayheader[1] = cleanupstr($dayheader[1]);

	    if ( $datestr ) { # Here come valid date strings

		#New day started, save already formed programme
		if ( $mode eq 'grab' ) { # Only grab mode requires this data
		    commitprog(\%day_pr, \%current_pr, \$new_year, \$pr_late);
		    undef %current_pr;

		    commitday($ch_id, \%day_pr); # Set global data
		    undef %day_pr;
		};

		if ( not defined $ch_name ) { # Get channel name 
		    if ($dayheader[1]) { $ch_name = $dayheader[1] }; # Defence against empty channel names  
		    getchannelid(\$ch_name, \$ch_id); # Set channel ID
		    if ($mode ne 'grab') { last }; # Only the grab mode requires slow full parsing. Jump to next file
		}; 

		getchannelid(\$ch_name, \$ch_id); # Set channel ID
    
		# Speedup. No need to read unrequested channels.
		if ( defined $channels and not exists( ${%{$channels}}{$ch_id} ) ) {return};

		# We do not know the year, so ParseDate assumes that it is the current year. 
		# Ancient yesteryear files will be misinterpreted 
		# We also hope that dates in the txt file will follow each other chronologically
		if ( defined $pr_date and not $new_year ) { # not the first date string in file
		    if ( Date_Cmp(ParseDate($pr_date), ParseDate($datestr)) > 0 ) { 
		    #if ( $datestr =~ m/01 Jan/ ) { 
			$new_year = 1; # This flag tells that we should add one year to the date
		    };
		};
    
		$pr_date = $datestr; # Set current date
		$pr_hour = 0; $pr_latesthour = 0; $pr_late = 0; #Reset hour tracker
		
	    } else { # Remarks
		$datestr = $_; 
		foreach (keys %current_pr) { # Append description lines.   
		    ${$current_pr{$_}}{'desc'} .= $datestr; 
		}; # Strings not related to any prog (such as format markers) are dropped here 
		   # because %current_pr will have no keys.
	    };
	};
    };
    if ( not defined $ch_name ) { die "Unable to parse file \"".$file."\". No channel name found!" };

    # Set global data

    if ( $mode eq 'grab' ) { # Only grab mode requires this data
	commitprog(\%day_pr, \%current_pr, \$new_year, \$pr_late);
	undef %current_pr;
	
        commitday($ch_id, \%day_pr); # Last day of the listing
	undef %day_pr;
    };
    
    } ) { $ch_data{$ch_id} = $ch_name; # Build list of channel names 
	} else { say "Error while parsing \"".$file."\"!\n" };
};

sub readtxt ($$$) { # Read any text file into an array (ignore lines starting with "#")
    my ($file, $txt, $enc) = @_;
    if (open (FILE, "< $file")) { 
        while (<FILE>) { 
	    chomp;
	    s/\x0d//g; # Windows linefeed
	    $_ = decode($enc, $_);
	    push (@{$txt}, $_);
	};
	close (FILE);
    } else { say "unable to read \"".$file."\"\n" };
};

sub getchannelid ($$) { # receive a references to channel name and id vars and set them
    my ($ch_name, $ch_id)= @_; 
    my (@record, @names);
    my $name = cookstr(${$ch_name});
    
    foreach (@id_alias) { # parse list of ids and names
	if (m/./ and not m/^#/) { # skip comments and empty lines
    	    @record = split (/\:/, $_);
	    if (@record == 2) {
		@names = split (/\|/, $record[1]);
		foreach (@names) { 
		    if ( $name eq cookstr($_) ) { 
			${$ch_id} = cleanupstr($record[0]); 
			${$ch_name} = cleanupstr($names[0]); # Use first variant from the name list (to unify channel names from different grabs and sources) 
			last; 
		    };
		}; 
    	    } else { say "bad alias file line \"".encodeoutstr($_)."\"\n" };
	};
    };
    if (not defined ${$ch_id}) { # No ID found in the list - create a dummy one and count new channels
	#if ( $mode eq 'grab' ) { say "Unknown channel \"".encodeoutstr(${$ch_name})."\". Using dummy id \"new_channel_".$new_ch_counter."\"." };
	${$ch_id} = "new_channel_".$new_ch_counter; 
	$new_ch_counter++;
    };
};

# Unzip
sub unzipdata ($$) { #Makes rawdata structure from zipped file contents. 
		     #Second parameter is an optional list of requested channels (used in grab mode)
    my ($zip_name, $channels) = @_;
    my (@txt, $file, $i);

    my $zip = Archive::Zip->new();
    if ($zip->read( $zip_name ) == 0) { # Return "false" if attempt to read file as zip failed

	my @zipmembers = $zip->memberNames();	

	my $bar = new XMLTV::ProgressBar("Parsing data", 101) if not $opt_quiet;
	my $count = 0; # count cycles

	foreach $file (@zipmembers) {
	    $count ++; # count cycles for bar update	    
	    if ( $file =~ m/\.txt/i ) { # Only txt files
		@txt = split("\n", decode($input_encoding, $zip->contents($file)) );
		foreach ($i=0; $i <= $#txt; $i++) {$txt[$i] =~ s/\x0d//g } # Remove DOS linefeeds in Linux
	        parsetxt($file, \@txt, $channels); 
		undef @txt;
	        update $bar (100/(@zipmembers/$count)) if not $opt_quiet;
	    };
	};
	update $bar (101) if not $opt_quiet;
        $bar->finish() if not $opt_quiet;
	return 1;
    } else { return 0 };
};

# Autodetect data type (zip/txt/dir) and process it
sub processlocaldata ($$) {

    my ($localpath, $channels) = @_; # Optional list of requested channels 
    my (@lst, @txt); 
    my ($file, $path);

	if ( -f $localpath ) { 
	    if ( not unzipdata($localpath, $channels) ) {
		readtxt($localpath, \@txt, $input_encoding);
		say "parsing data";
		parsetxt($localpath, \@txt, $channels);  
	    };
	} elsif ( -d $localpath ) {
	
	    local (*ROOT);
	    opendir ROOT, $localpath; # List directory
	    @lst = readdir ROOT;
	    closedir ROOT;

	    my $bar = new XMLTV::ProgressBar("Parsing data", 101) if not $opt_quiet;
	    my $count = 0; # count cycles
	
	    foreach $file (@lst) # Create global rawdata structure from TVAgent/ListTV files
	    {
		$count ++; # count cycles for bar update	    
		$path = catfile ($localpath, $file); # Make full path
		if (no_upwards($file) and -f $path) {
		    if ($file =~ m/\.txt$/i) { # Only txt files
			readtxt($path, \@txt, $input_encoding);
			parsetxt($path, \@txt, $channels);  
			undef @txt;
			update $bar (100/(@lst/$count)) if not $opt_quiet;
		    } else { say "skipping file with unknown extension \"".$file."\"."};
		}; 
	    };
	    update $bar (101) if not $opt_quiet;
	    $bar->finish() if not $opt_quiet;
	    undef @lst; 
	};
};

# Download / read
sub get_data { 
    
    my ($channels) = @_; # Optional list of requested channels 
    my $TMPFILE;
    my $file;
    
    if ( -f $id_file ) { readtxt($id_file, \@id_alias, 'UTF-8') } # Read list of channel names and IDs into memory
     elsif ( -f catfile($SHARE_DIR, $id_file) ) { readtxt(catfile($SHARE_DIR, $id_file), \@id_alias, 'UTF-8') }
       else { die "Unable to find channel ID file \"$id_file\"."};

    if ( not defined $opt_local_path ) {
	my $data = get_nice($url_datafile);
	die "could not get datafile $url_datafile, aborting\n" if not defined $data;

	# Temp file creation
	do { $file = tmpnam() } until $TMPFILE = IO::File->new($file, O_RDWR|O_CREAT|O_EXCL);
	END { if ( defined $file and -f $file ) {unlink($file) or say "could not delete temp file \"".$file."\"."} } 

	open (TMPFILE, "> $file") or die "unable to write temporary file \"".$file."\", aborting";
	binmode(TMPFILE);
	print TMPFILE $data;
	close(TMPFILE);
	undef $data;
	processlocaldata($file, $channels);
		
    } else { processlocaldata($opt_local_path, $channels) }; 
};

