#!/usr/bin/perl
#
# TkTTS, a GUI frontend for text to speech on systems using nautilus file manager (gnome2 people).
# Put it in your ~/.gnome2/nautilus-scripts/ directory and chmod +x.
# Select an arbitrary number of pdf, djvu, epub, ps, or txt files in the nautilus file manager and
# right click one of them. Go to 'Scripts' and click 'TkTTS.pl'. The Tk GUI will pop-up. right click
# on the file you wish to listen to. A sub-menu pops up. 
# Click the title of the file to read it.
# Click the '..' to close the sub-menu.
# Click 'Read *ALL* docs to read everything (in order shown).
# etc, etc.
#

use strict;
use warnings;
use Encode;
use charnames':short';
use Clipboard;
use Tk;
use Tk::Menu;
#use Tk::DialogBox; # in Tk::
use Tk::Pane; # in Tk::
#use Tk::Carp qw/tkwarn tkdie/; #for debug
#$Tk::Carp::ImmediateWarnings = "true"; #for debug
use List::Util qw( reduce );
use Parallel::ForkManager;
my $manager = new Parallel::ForkManager( 4 );

#CONFIG################################################################
my $norefs = 0; # a very simple attempt to remove references from the end, default 0 off.
my $wordreplace = 0; # scientific notation pronounciation fixes, default 0 off.
my $defaultdocviewer = 'evince'; # xpdf, okular?
my $epubdocviewer = "fbreader"; # calibre
my $htmldocviewer = "true"; # opera? firefox? iceweasel? chromium? nothing(true)?
my $editor = "gedit"; # mousepad, vim, emacs, etc
my $tts = 'festival --tts'; # festival
#my $tts = "swift -f"; # cepstral swift
#my $tts = "flite -f"; # festival lite
#my $tts = "espeak -ven+f4 -p 70 -f"; # espeak is installed in lots of distros
my $pdftotext = 'pdftotext';
my $djvutotext = 'djvutxt';
my $pstotext = 'ps2ascii';
# my $epub2txt = 'epub2text'; # not needed, I built in functionality to TkTTS.
my $epub2txt = 'ebook-convert';
my $html2text = 'html2text';
#my $html2text = 'html2text -width 140';
my $tempfilepath = '/tmp/tts_temp.txt'; # probably shouldn't change the name, dir change is fine.
my $epubtempdir = '/tmp/epub2text'; # if you change this, be careful, I do a `rm -rf $epubtempdir`.
my $homedir = '~/.tktts/';
my $webpage = '/home/superkuh/www/islisteningto.html'; 
my $makewebpage = 1; # leave this at 0 unless you're interested or me.
my $titlerepeatremoval = 0; # for any line >10 chars long, remove all instances after the 10th repetition


my %replaceDB = ( # some punctuation is not pronounced correctly.
	"\x{2026}" => '...', #'…' => '...',
	"\x{201C}" => "\"", #'“' => '\"',
	"\x{201D}" => "\"", #'”' => '\"',
	"\x{2018}" => "\'",	#'’' => '\'',
	"\x{2019}" => "\'",	#'’' => '\'',
	"\x{02BC}" => "\'", #'’'
	"\x{07F4}" => "\'", #'’'
	"\x{FF07}" => "\'", #'’'
	"\x{02EE}" => "\"", #'”'
	"\x{201A}" => "\'",
	"\x{201E}" => "\"",
	"\x{2032}" => "\'", # prime
	"\x{2033}" => "\"", # double prime
	"\x{246}" => "o", #  small o, dieresis or umlaut mark
	"\x{C280}" => "ff", # € = ff 
	"\x{C281}" => "ffi", #   = ffi
	"\x{FB00}" => "ff",
	"\x{FB01}" => "fi",
	"\x{FB02}" => "fl",
	"\x{FB03}" => "ffi",
	"\x{FB04}" => "ffl",
	"\x{C2AA}" => "ff", # ª FEMININE ORDINAL INDICATOR
	"\x{C2BA}" => "fl", # º MASCULINE ORDINAL INDICATOR
	"\x{C282}" => ' ',
	"\x{C283}" => ' ',
	"\x{C284}" => ' ',
	"\x{C285}" => ' ',
	"\x{C286}" => ' ',
	"\x{C287}" => ' ', # might be a ^, as in 1x10^4
	"\x{C288}" => ' ',
	"\x{C2AE}" => "fi", # ® = fi
	"\x{2014}" => '-', # UTF-32 EM DASH
	" fi-" => " beta-"

);
my %replaceDBscience = ( # most scientific units and notation is unpronouncable	
	"LiTaO3" => ' Lithium Tantalate ',
	" keV " => ' kilo electronvolt ',
	" kJ " => " kilojoule ",
######## I think these are too general for the public version of this document. :\
######## But they fix a lot of cases while only breaking a small subset of the
######## actual numbers that might be represented.
	" 101 " => " 10 to the 1st ",
	" 102 " => " 10 to the 2nd ",
	" 103 " => " 10 to the 3rd ",
	" 104 " => " 10 to the 4th ",
	" 105 " => " 10 to the 5th ",
	" 106 " => " 10 to the 6th ",
	" 107 " => " 10 to the 7th ",
	" 108 " => " 10 to the 8th ",
	" 109 " => " 10 to the 9th ",
	" 1010 " => " 10 to the 10th ",
	" 1011 " => " 10 to the 11th ",
	" 1012 " => " 10 to the 12th ",
	" 1013 " => " 10 to the 13th ",
	" 1014 " => " 10 to the 14th ",
	" 1015 " => " 10 to the 15th ",
	" 1016 " => " 10 to the 16th ",
	" 1017 " => " 10 to the 17th ",
	" 1018 " => " 10 to the 18th ",
	" 1019 " => " 10 to the 19th ",
	" 1020 " => " 10 to the 20th ",
	" 1021 " => " 10 to the 21st ",
	" 1022 " => " 10 to the 22nd ",
	" 1023 " => " 10 to the 23rd ",
	" 1024 " => " 10 to the 24th ",
	" 1025 " => " 10 to the 25th ",
	" 1026 " => " 10 to the 26th ",
	" 1027 " => " 10 to the 27th ",
	" 1028 " => " 10 to the 28th ",
	" 1029 " => " 10 to the 29th ",
	" 1030 " => " 10 to the 30th ",
	" 1031 " => " 10 to the 31st ",
	" 1032 " => " 10 to the 32nd ",
	" 1033 " => " 10 to the 33rd ",
	" 101," => " 10 to the 1st ",
	" 102," => " 10 to the 2nd ",
	" 103," => " 10 to the 3rd ",
	" 104," => " 10 to the 4th ",
	" 105," => " 10 to the 5th ",
	" 106," => " 10 to the 6th ",
	" 107," => " 10 to the 7th ",
	" 108," => " 10 to the 8th ",
	" 109," => " 10 to the 9th ",
	" 1010," => " 10 to the 10th ",
	" 1011," => " 10 to the 11th ",
	" 1012," => " 10 to the 12th ",
	" 1013," => " 10 to the 13th ",
	" 1014," => " 10 to the 14th ",
	" 1015," => " 10 to the 15th ",
	" 1016," => " 10 to the 16th ",
	" 1017," => " 10 to the 17th ",
	" 1018," => " 10 to the 18th ",
	" 1019," => " 10 to the 19th ",
	" 1020," => " 10 to the 20th ",
	" 1021," => " 10 to the 21st ",
	" 1022," => " 10 to the 22nd ",
	" 1023," => " 10 to the 23rd ",
	" 1024," => " 10 to the 24th ",
	" 1025," => " 10 to the 25th ",
	" 1026," => " 10 to the 26th ",
	" 1027," => " 10 to the 27th ",
	" 1028," => " 10 to the 28th ",
	" 1029," => " 10 to the 29th ",
	" 1030," => " 10 to the 30th ",
	" 1031," => " 10 to the 31st ",
	" 1032," => " 10 to the 32nd ",
	" 1033," => " 10 to the 33rd ",
	" 101." => " 10 to the 1st ",
	" 102." => " 10 to the 2nd ",
	" 103." => " 10 to the 3rd ",
	" 104." => " 10 to the 4th ",
	" 105." => " 10 to the 5th ",
	" 106." => " 10 to the 6th ",
	" 107." => " 10 to the 7th ",
	" 108." => " 10 to the 8th ",
	" 109." => " 10 to the 9th ",
	" 1010." => " 10 to the 10th ",
	" 1011." => " 10 to the 11th ",
	" 1012." => " 10 to the 12th ",
	" 1013." => " 10 to the 13th ",
	" 1014." => " 10 to the 14th ",
	" 1015." => " 10 to the 15th ",
	" 1016." => " 10 to the 16th ",
	" 1017." => " 10 to the 17th ",
	" 1018." => " 10 to the 18th ",
	" 1019." => " 10 to the 19th ",
	" 1020." => " 10 to the 20th ",
	" 1021." => " 10 to the 21st ",
	" 1022." => " 10 to the 22nd ",
	" 1023." => " 10 to the 23rd ",
	" 1024." => " 10 to the 24th ",
	" 1025." => " 10 to the 25th ",
	" 1026." => " 10 to the 26th ",
	" 1027." => " 10 to the 27th ",
	" 1028." => " 10 to the 28th ",
	" 1029." => " 10 to the 29th ",
	" 1030." => " 10 to the 30th ",
	" 1031." => " 10 to the 31st ",
	" 1032." => " 10 to the 32nd ",
	" 1033." => " 10 to the 33rd ",
	"0ft" => "0 ft ",
	"1ft" => "1 ft ",
	"2ft" => "2 ft ",
	"3ft" => "3 ft ",
	"4ft" => "4 ft ",
	"5ft" => "5 ft ",
	"6ft" => "6 ft ",
	"7ft" => "7 ft ",
	"8ft" => "8 ft ",
	"9ft" => "9 ft ",
	"\x{00B0}" => " degree ",#00B0 ° DEGREE SIGN
	"\x{00BD}" => " one half ",# ½ VULGAR FRACTION ONE HALF
	"\x{00D7}" => " times ", # × MULTIPLICATION SIGN 
	"\x{02DA}" => " Angstrom ",# ˚ RING ABOVE, sometimes Angstrom is mislabed this.
	"\x{03BD}" => " frequency ",# ν GREEK SMALL LETTER NU
	"\x{2013}" => " - ",# – EN DASH
	"\x{25E6}" => " degree ",# ◦ WHITE BULLET
	"\x{00A7}" => " section ",# § SECTION SIGN
	"\N{greek:Delta}" => ' the change in ',
	"\N{greek:Delta}t" => ' the change in time ',
	"\N{greek:Mu}" => ' micro ',
	"\N{greek:Omega}" => ' omega ',
	"\x{2126}" => ' omega ', # Ω OHM SIGN, if it's electrical, well, crap. but for physics it works.
	"\N{greek:Sigma}" => ' sigma ',
	"\N{greek:Rho}" => ' row ',
	"~" => ' about ',
	"\x{0398}" => ' theta ',
	"\x{03A6}" => ' phi ',
	"\x{03C6}" => ' phi ',# φ GREEK SMALL LETTER PHI
	"\x{03B1}" => ' alpha ',	
	"\x{03B2}" => ' beta ',
	"\x{03B3}" => ' gamma ',
	"\x{03B4}" => ' delta ',
	"\x{03B5}" => ' epsilon ',
	"\x{03B8}" => ' theta ',
	"\x{03BB}" => ' lambda ',
	"\x{039B}" => ' lambda ',
	"\x{03BC}" => ' mu ',
	"\x{03BE}" => ' chai ',
	"\x{03C0}" => ' pi ',
	"\x{03C1}" => ' rho ',
	"\x{03C3}" => ' sigma ',
	"\x{03C4}" => ' tao ', #tao? what, does tau pronounce wrong?
	"\x{03C7}" => ' chai ',
	"\x{03C8}" => ' psi ',
	"\x{03C9}" => ' omega ',
	"\x{00B1}" => ' plus or minus ',
	"\x{2213}" => ' plus or minus ',
	"\x{2202}" => ' partial differential of ',
	"\x{2207}" => ' del ',# 002207 ∇ NABL
	"\x{2206}" => ' change in ',
	"\x{2203}" => ' there exists ',
	"\x{2200}" => ' for all ',
	"\x{2208}" => ' element of ',
	"\x{2209}" => ' not an element of ',
	"\x{2212}" => ' minus ',
	"\x{221A}" => ' square root of ',
	"\x{221D}" => ' proportional to ',
	"\x{221E}" => ' infinity ',
	"\x{2227}" => ' and ',
	"\x{2228}" => ' or ',
	"\x{2229}" => ' intersection of ',
	"\x{222A}" => ' union of ',
	"\x{222B}" => ' integral of ',
	"\x{222C}" => ' double integral of ',
	"\x{222D}" => ' triple integral of ',
	"\x{222E}" => ' contour integral of ',
	"\x{222F}" => ' surface integral of ',
	"\x{2230}" => ' volume integral of ',
	"\x{2234}" => ' therefor ',
	"\x{223C}" => ' approximately ',
	"\x{2245}" => ' approximately equal to ',
	"\x{2248}" => ' almost equal to ',
	"\x{2260}" => ' not equal to ',
	"\x{2264}" => ' less than or equal to ',
	"\x{2265}" => ' greater than or equal to ',
	"\x{2297}" => ' cross product ',
	"\x{22A5}" => ' orthoganal to ',
	"\x{22C5}" => ' dot product ',
	"\x{0026}" => ' and ',	
	"\x{003C}" => ' less than ',
	"\x{003E}" => ' greater than ',
	' sfu ' => ' solar flux unit ', 
	' COO− ' => ' carboxyl ',
	'Ca2+' => ' calcium two plus ',
	'Ca2+ -' => ' calcium two plus ',
	' Ca-ion ' => ' calcium ion ',
	'CaCl2' => ' calcium chloride ',
	' Na+ ' => ' sodium plus ',
	'Na-ion' => ' sodium ion ',
	'NaCl' => ' sodium chloride ',
#	' K+ ' => ' potassium plus ',  # too often confused in other fields
	'Ba2+' => ' barium two plus ',
	'Sr2+' => ' strontium two plus ',	
	# I don't know how I can catch all these one and two letter abbreviations with this method.
	# they need a regex like s/\d+\s?($key)/$DB{$key}/g
	' mM ' => ' millimolar ',
	' g ' => ' grams ',
	' g\)' => ' grams ',
	'g/' => 'grams per ',
	'mg/kg' => 'milligram per kilogram ',
	' mg ' => ' milligrams ',
	'mg/' => 'milligrams per ',
	'mg\)' => ' milligrams)',
	'm/s' => ' meters per second ',
	'm/' => 'meters per ',	
	' mA ' => ' milliamps ',
	'mA\)' => ' milliamps)',
	'mA/' => ' milliamps per ',
	' ms ' => ' milliseconds ',
	'ms\)' => ' milliseconds)',
	'ms,' => 'milliseconds,',
	'ms/' => ' milliseconds per ',
	' cm ' => ' centimeters ',
	'cm\)' => ' centimeters ',
	'cm/' => ' centimeters per ',
	' km ' => 'kilometer',
	' mm ' => ' millimeters ',
	'mm\)' => ' millimeters)',
	'mm/' => ' millimeters per ',
	' nm ' => ' nanometers ',
	'nm\)' => ' nanometers) ',
	'nm/' => ' nanometers per ',
	'V/m' => ' volts per meter ',
	'mV' => ' millivolts ',
	'kHz' => ' kilohertz ',
	'MHz' => ' megahertz ',
	'GHz' => ' gigahertz ',
	'THz' => ' terahertz ',
	'Hz' => ' hertz ',
	' Pa ' => ' pascals ',
	'MPa' => ' mega pascals ',
	"\N{greek:Mu}F" => ' micro farads ',
	"\N{greek:Mu}l" => ' micro liters ',
	' ml ' => ' milliliters ',
	'ml\)' => ' milliliters) ',
	'ml/' => ' milliliters per ',
	' m2' => ' meters squared ',
	' m3' => ' meters cubed ',
	'cm2' => ' centimeters squared ',
	'cm3' => ' centimeters cubed ',
	'J/' => ' joules per ',
	'MJ ' => 'mega joules ',
	' eV ' => 'electron volts '
	
);

my %replaceDBlipids = (
	' DOPG' => 'dioleoyl-phosphatidylglycerol',
	' DHA' => 'docosahexaenoic acid',
	' DPA' => 'docosapentaenoic acid',
	' EPA' => 'eicosapentaenoic acid', #this might be trouble
	' DGDG' => 'digalactosyl diglyceride',
	' PE' => 'phosphatidylethanolamine',
	' PC' => 'phosphatidylcholine',
	' PI' => 'phosphatidylinositol',
	' PS' => 'phosphatidylserine',
	' PG' => 'phosphatidylglycerol',
	' DAG' => 'di-acylglycerol',
	' DAPC' => 'di-arachioyl phosphatidylcholine',
	' DCPC' => 'di-decanoyl phosphatidylcholine',
	' DLPC' => 'di-laureoyl phosphatidylcholine',
	' DMPC' => 'di-myristoyl phosphatidylcholine',
	' DMPE' => 'di-myristoyl phosphatidylethanolamine',
	' DMPG' => 'di-myristoyl phosphatidylglycerol',
	' DOPC' => 'di-oleoyl phosphatidylcholine',
	' DPPC' => 'di-palmitoyl phosphatidylcholine',
	' DPPE' => 'di-palmitoyl phosphatidylethanolamine',	
	' POPC' => 'palmitoyl-oleoyl phosphatidylcholine',
	' SOPC' => 'stearoyl-oleoyl phosphatidylcholine'
);
#ENDCONFIG#############################################################

#DELARINGSOMESTUFFINABROADCONTEXT######################################
my @filepaths;
my %filenamesandfilepaths;
my @procid;
my $startandstop;
my $viewdoc = 1;
my $freeze = 0; #0 = allow temp file overwrite; 1 = freeze temp file.
my $usesaved = 0;
my $currentname;
my $docviewer; 

#NAUTILUSINTERACTION###################################################
@filepaths = getnautliuspaths();

# assume command line call if there's nothing from nautilus
unless ($filepaths[0]) {
	$filepaths[0] = $ARGV[0];
	my $text = converttotext($filepaths[0]);
	# uh, I guess we'll always replace? That's kind of lame but cli is only for tests.
#	$text = acronym_convert($text,\%replaceDB);
#	$text = acronym_convert($text,\%replaceDBscience);
	# *later* I should've used the doreplacements();
	$text = doreplacements($text);

	my @commands = ("$docviewer \"$filepaths[0]\"", "$tts \"$tempfilepath\""); 
	foreach my $command (@commands) {
		my $pid = $manager->start and next;
		exec($command);
		push (@procid,$pid);
		$manager->finish;	
	}
	$manager->wait_all_children;
	killtts();
	exit;
}

# changed for MATE desktop since gnome2 no longer exists.
sub getnautliuspaths {
		if ($ENV{'CAJA_SCRIPT_SELECTED_FILE_PATHS'}) {
			 return(split(/\n/,$ENV{'CAJA_SCRIPT_SELECTED_FILE_PATHS'}))
		} else {
			 return 0;
		}
}
sub tildapathtohome {
	# ~/blah -> /home/superkuh/
	my $filepath = shift;
	$filepath =~ s{ ^ ~ ( [^/]* ) }
              { $1
                    ? (getpwnam($1))[7]
                    : ( $ENV{HOME} || $ENV{LOGDIR}
                         || (getpwuid($>))[7]
                       )
	}ex;
	return $filepath
}

#TK####################################################################
#$mw = MainWindow->new();
my $main = MainWindow->new(); 
my $paneheight;

my $numberofdocuments = scalar @filepaths;
if ( $numberofdocuments > 1) {
	$main->title("TkTTS - $numberofdocuments docs");
	$paneheight = 22 * $numberofdocuments;
} else {
	$paneheight = 22;
	my $title = $filepaths[0];
	$main->title("TkTTS - $title");
}
$main->minsize(100,22);
#$main->maxsize(600,220);

##TK Pane Scrollbars################################################## # -minsize=>'100,25' ?
my $pane = $main->Scrolled( 'Pane', -scrollbars => 'ow', -height => $paneheight, )->pack( -expand => 1, -fill => 'both' );


##TK::DialogBox########################################################
# page range dialog
my $dialog = $main->DialogBox( -title   => "First and last pages, ex: -f 12 -l 33",
                            -buttons => [ "Submit", "Cancel" ],
                           );
$dialog->add("Label", -text => "Flags")->pack( );
my $entry = $dialog->add("Entry", -width => 50)->pack( );

##TK::MENU RightClick Context##########################################
###bbfu @ http://www.perlmonks.org/?node_id=124095

my $menu = $main->Menu(-tearoff => 0);
$menu->add('separator'); # this acts as the read command
$menu->add('command', -label => '..', -command => sub { 1 });
$menu->add('command', -label => 'Stop TTS', -command => \&killtts);
$menu->add('command', -label => 'Set Pages', -command => \&pageflags);
$menu->add('checkbutton', -label => 'Freeze', -offvalue => '0', -onvalue => '1', -variable => \$freeze);
$menu->add('checkbutton', -label => 'TTS Replace', -offvalue => '0', -onvalue => '1', -variable => \$wordreplace);
$menu->add('checkbutton', -label => 'Del References', -offvalue => '0', -onvalue => '1', -variable => \$norefs);
$menu->add('checkbutton', -label => 'Del Repetitive', -offvalue => '0', -onvalue => '1', -variable => \$titlerepeatremoval);
$menu->add('command', -label => 'Save', -command => \&savestate);
$menu->add('checkbutton', -label => 'Use Saved', -offvalue => '0', -onvalue => '1', -variable => \$usesaved);
$menu->add('command', -label => 'Edit Temp', -command => \&edittemp);
$menu->add('command', -label => 'Clipboard', -command => \&clipread);
$menu->add('command', -label => 'Read *ALL* docs', -command => \&READALLDOCS);
#$menu->add("Entry", -width => 50)->pack( );
if ($makewebpage) {
	$menu->add('checkbutton', -label => 'Webpage', -offvalue => '0', -onvalue => '1', -variable => \$makewebpage);
}


foreach my $filepath (@filepaths) {	
	my $reversed = reverse $filepath; 
	$reversed =~ m/^\w{2,4}\.(.*?)\//; # this was initially a silly 
	my $name = reverse $1;           # way around greedy behavior.
	#tkwarn "$reversed : $name";
	$pane->Label(-text => $name)->pack();
	$filenamesandfilepaths{$name} = $filepath;
}

# <3> When a button is released over a menu, the active entry (if any) is invoked. The menu also unposts unless it is a torn-off menu.
$main->bind('<3>', [\&showmenu, Ev('X'), Ev('Y'), Ev('W')]);
$main->focus();
MainLoop;

# Read command is and all magic starts here.
sub showmenu {
  my ($self, $x, $y, $widget) = @_;
  my $label = $widget->cget('-text');
  $menu->insert(1, 'command',
    -label => $label,
    -command => sub { 
    					#tkwarn "Clicked $label.\n Path: $filenamesandfilepaths{$label}\n";
						$currentname = $label if $label;
						updatereadinglist($filenamesandfilepaths{$label}, $label);
						#tkwarn "test: $label, $filenamesandfilepaths{$label}";
    					readtext($label) if $label; 
    				},
  );
  $menu->post($x, $y);
  $menu->delete(0,0);
}

sub READALLDOCS {
	# Get a list of all labels here the way I made them first.
	# $filenamesandfilepaths{$label}{$filepathetc}...
	# so get keys of %filenamesandfilepaths
	my $alldocs;
	foreach my $name (keys %filenamesandfilepaths) { 
		updatereadinglist($filenamesandfilepaths{$name}, $name);

		my $text = converttotext($filenamesandfilepaths{$name});
		$text = doreplacements($text);
		$alldocs .= $text;
	}    
	
	#tkwarn "starting tts";
	unless ($freeze or $usesaved) { # might break something @2010
		open (TMPTXT, ">$tempfilepath") or die "$!";
		print TMPTXT $alldocs;
		close TMPTXT;
	}
	my @commands = ("$tts \"$tempfilepath\"");	
	foreach my $command (@commands) {
		my $pid = $manager->start and next;
		exec($command);
		push (@procid,$pid);
		$manager->finish;	
	}
}

sub pageflags {
    my $button;
    my $done = 0;	
  	do {
          # show the dialog
          $button = $dialog->Show;
          # act based on what button they pushed
          	if ($button eq "Submit") {
              	 $startandstop = $entry->get; 
				 # $filenamesandfilepaths{$name}{'pageflags'} = "$startandstop"; # try to add page flags for each
             	 if (defined($startandstop) && length($startandstop)) {
              	    print "Flags: $startandstop\n"; 

	        my $dynamictitle = $main->title;
		if ($numberofdocuments > 1) {
		    $main->title("$dynamictitle - pages $startandstop");
		} else {
		    $main->title("pages $startandstop: $dynamictitle");
		}

             	    $done = 1;
           		 } else {
                 	print "No flags registered! No one will see this, probably.\n";
           		 }
			} else {
              print "okay, nevermind about reading only defined pages. No one will see this.\n";
              $done = 1;
          	}
  	} until $done;
} 

sub readtext {
	my $name = shift;
	$currentname = $name;
	my @commands;
	my $text;

	unless ($freeze or $usesaved) { # might break something @2010 ## what?

		$text = converttotext($filenamesandfilepaths{$name});
		$text = doreplacements($text);

		open (TMPTXT, ">$tempfilepath") or die "$!";
		print TMPTXT $text;
		close TMPTXT;
	}	
	
	if ($viewdoc) {
		if ($usesaved) {
			my $savedpath = tildapathtohome($homedir) . "$name/tts_temp.txt";
			@commands = ("$docviewer \"$filenamesandfilepaths{$name}\"", "$tts \"$savedpath\"");
		} else {		
			@commands = ("$docviewer \"$filenamesandfilepaths{$name}\"", "$tts \"$tempfilepath\"");
		} 
	} else {
		if ($usesaved) {
			my $savedpath = tildapathtohome($homedir) . "$name/tts_temp.txt";
			#tkwarn "fucked: $savedpath";
			@commands = ("$tts \"$savedpath\"");
		} else {
			@commands = ("$tts \"$tempfilepath\"");
		}
	} 
	foreach my $command (@commands) {
		my $pid = $manager->start and next;
		exec($command);
		push (@procid,$pid);
		$manager->finish;	
	}
}

sub converttotext {
	my $filepath = shift;
	my $resultingtext;
	#run system utilities
	if (my $filetype = filetypecheck($filepath)) {
		if ($filetype eq 'pdf') {	
			$viewdoc = 1;				
			if ($startandstop) {
				$resultingtext = `$pdftotext -nopgbrk $startandstop \"$filepath\" -`;
			} else {
				$resultingtext = `$pdftotext -nopgbrk \"$filepath\" -`;
			}
			
		} elsif ($filetype eq 'djvu') {
			$viewdoc = 1;
			if ($startandstop =~ /-\w\s(\w) -\w\s(\w)/) {
				# the above and this ails for just a start page or just and end page
				# must have both
				$startandstop = "$1" . "-" .  "$2";	
				# djvutxt -page=1-10 "Topology without tears morris.djvu"	std output #new version		
				$resultingtext = `$djvutotext -page=$startandstop \"$filepath\"`;
			} else {
				$resultingtext = `$djvutotext \"$filepath\"`;
			}
		} elsif ($filetype eq 'txt') {
			$resultingtext = do { local( @ARGV, $/ ) = $filepath ; <> } ;
			$viewdoc = 0;
		} elsif ($filetype eq 'ps') {
			# no page to page options here yet.
			$resultingtext = `$pstotext \"$filepath\"`;
			$viewdoc = 1;
		} elsif ($filetype eq 'epub') {
			$viewdoc = 1;
			#$resultingtext = `$epub2txt \"$filepath\"`;
			`$epub2txt \"$filepath\" $tempfilepath`;
			open (FILE, "$tempfilepath") or die "Can't open $tempfilepath: $!\n";
			$resultingtext = do { local $/;  <FILE> };	
			close (FILE);
			#$resultingtext = epub2text($filepath,$epubtempdir);
		} elsif ($filetype eq 'html') {
			$viewdoc = 1;
			$resultingtext = `$html2text \"$filepath\"`;
		} else {
			#do nothing?
		}
	}
	my $string = decode("utf8", $resultingtext);
	# Typographic ligations are the bane of my existence.
	return $string;
}

sub filetypecheck {	
	my $filepathtocheck = shift;

	if ($filepathtocheck =~ /.+\.(\w+)$/) {
		my $type = $1;

		if ($type =~ /pdf/i) {
			$docviewer = $defaultdocviewer;
			return 'pdf';
		} elsif ($type =~ /djvu/i) {
			$docviewer = $defaultdocviewer;
			return 'djvu';
		} elsif ($type =~ /txt/i) {
			$docviewer = $epubdocviewer;
			return 'txt';
		} elsif ($type =~ /ps/i) {
			$docviewer = $defaultdocviewer;
			return 'ps';
		} elsif ($type =~ /(epub|zip)/i) {
			$docviewer = $epubdocviewer;
			return 'epub';
		} elsif ($type =~ /(html)/i) {
			$docviewer = $htmldocviewer;
			return 'html';
		} else {
			return 0;
			# die "The filetype doesn't seem to be pdf, djvu, epub, ascii, or postscript.\n$!";
		}	
	}
}

sub doreplacements {
	my $text = shift;
	if ($norefs) {
		$text = filterreferences($text);
	}
	if ($wordreplace) {
		$text = acronym_convert($text,\%replaceDBscience);
	}
	if ($titlerepeatremoval) {
		$text = removetitleandchapterheaders($text);
	}


	# always replace non-standard apostrophes and quotations that cannot be pronounced.
	$text = acronym_convert($text,\%replaceDB);
	return $text;
}

sub killtts {
	$manager->finish;
	close FESTIVAL;
	`killall festival`;
	`killall aplay`;
	`killall audsp`;
	`kill -9 \`pidof festival\``;
	`kill -9 \`pidof audsp\``;
	if (@procid) {
		foreach (@procid) {
			`kill -9 $_`;
		}
	}
}

$SIG{INT} = sub {
	$manager->finish;
	close FESTIVAL;
	`killall festival`;
	`kilall audsp`;
	`killall aplay`;
	`killall paplay`;
	`kill -9 \`pidof festival\``;
	`kill -9 \`pidof audsp\``;
	if (@procid) {
		foreach (@procid) {
			`kill -9 $_`;
		}
	}
	die "A horrible, horrible death.\n$!";
};

sub clipread {
	my $cliptext = Clipboard->paste;
	$cliptext = acronym_convert($cliptext,\%replaceDB);
	open (FESTIVAL, "|festival") or die "Cannot talk to festival.\n$!";
	print FESTIVAL "(SayText \"$cliptext\")";
	close FESTIVAL;
}

# Delete repetitive
sub removetitleandchapterheaders {
	my $texttoedit = shift;
	my $numdupes = shift || 10;
	my $numchars = shift || 10;
	# this function starts removing lines after they have been encountered 10 times and
	# if they aren't under 10 characters. this should remove repetitions of the book or 
	# paper title while not giving too many false positives.

	# todo: add check for \n\d+\n for page numbers. \n(page|p|p\.)?(\s?)\d+\n

	my $fixedtext;
	my %dup;
	my @lines = split(/\n/, $texttoedit);
	foreach my $line (@lines) {
		if ($dup{$line}++ > $numdupes) {
			if (length($line) < $numchars) {
				$fixedtext .= "$line\n";
			} else {
				next;
			}
		} else {
			$fixedtext .= "$line\n";	
		}
	}
	return $fixedtext;
}

sub filterreferences {
	my $texttoedit = shift;
	# whenever "references" follwed by a newline is encountered discard all follwing lines until 
	# encountering words like chapter, introduction, section, or abstract that indicate the start
	# of new content. This fails in ~10% of cases but it's really helpful for the other 90%.
	my $fixedtext;
	my $inreferencesstate = 0;
	my @lines = split(/\n/, $texttoedit);
	foreach my $line (@lines) {
		if ($line =~ /(chapter|introduction|section|abstract|appendix)/i) {
			$inreferencesstate = 0;
		} elsif ($line =~ /references\s?$/i) { #only if there's nothing after references like its
			$inreferencesstate = 1;        #a heading of a section.
		}
		$fixedtext .= "$line\n" unless $inreferencesstate;	
	}
	$texttoedit = $fixedtext;

	# Perhaps remove all (.+\d{4}), to remove inline references. But how to be sure?
	
	return $texttoedit;
}

sub acronym_convert {
	my $texttoedit = shift;
	my $params = shift;
    my %DB = %$params; 
	foreach my $key (keys %DB) {
		$texttoedit =~ s/$key/$DB{$key}/g;	
	}
	return $texttoedit;
}

sub savestate {
	my $fuckhomedirs;
	#$tempfilepath
	$usesaved = 1;
	my $dirname = $currentname;
	$fuckhomedirs = tildapathtohome($homedir);
	#tkwarn "savestate test: \"$tempfilepath\" , \"$homedir/$dirname/\n";
	mkdir "$fuckhomedirs/$dirname";
	`cp \"$tempfilepath\" \"$fuckhomedirs/$dirname/\"`;

#	mkdir "$homedir/$dirname";
#	`cp \"$tempfilepath\" \"$homedir/$dirname/\"`;
}

sub savestate_stupid {
	#$tempfilepath
	$usesaved = 1;
	my $totalsavepath;
#	tkwarn "\nhuh? $currentname\n";
	my $dirname = $currentname;

	if (-e "$homedir/$dirname") {

#		tkwarn "it exists";
	} else {
		$homedir = tildapathtohome($homedir);
		$totalsavepath = \"$homedir" . "$dirname\"";
		$totalsavepath =  join( q//, map { s/\\s+/_/g; lc } split /[^\\s\\w]+/, $totalsavepath);
		`mkdir $totalsavepath`;
		`cp \"$tempfilepath\" \"$totalsavepath\"`;

#tkwarn "tempfilepath: \"$tempfilepath\" ,\n\n homedir $homedir\n dirname: \n$dirname\n\nfinal: $totalsavepath"

		#if ($homedir =~ /(\/^)/) (	
		#	tkwarn "chop: $1, \\n";
		#	$homedir =~ s/\/^//;
		#	tkwarn " $homedir";
		#}
	}

	
}

sub edittemp {

	# When edit temp is used in TkTTS set 'Freeze' enabled.
	$freeze = 1;
	my $command;


	#tkwarn "0:$homedir, $currentname";
	my $fuckthisshit = tildapathtohome($homedir) . "$currentname/tts_temp.txt";
	#tkwarn "2:$tempfilepath\n";

	if ($usesaved) {
		
		#tkwarn "$fuckthisshit";
		$command = "$editor \"$fuckthisshit\"";
		#$command = "$editor \"$homedir/$currentname/tts_temp.txt\"";
	} else {
		$command = "$editor \"$tempfilepath\"";
	}
	{
		my $pid = $manager->start and next;
		exec($command);
		push (@procid,$pid);
		$manager->finish;	
		$manager->wait_all_children;
	}
}

sub getdate {
	my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
	my @weekDays = qw(Sun Mon Tue Wed Thu Fri Sat Sun);
	my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = gmtime();
	my $year = 1900 + $yearOffset;
	$dayOfMonth = sprintf '%02u', $dayOfMonth;
	$minute = sprintf '%02u', $minute;
	my $theGMTime = "$weekDays[$dayOfWeek] $months[$month] $dayOfMonth, $year";
	return $theGMTime; 
}

sub epub2text {
	my ($filename,$tmpdir) = @_;
	my $text;
	my $OEBPSdir;

	`rm -rf $tmpdir`;
	mkdir($tmpdir, 0777) || die "dir creation $tmpdir failed: $!";
	#copy("$filename","$tmpdir/epub.zip") or die "Copy failed: $!";
	system "cp", "$filename", "$tmpdir/epub.zip";
	`unzip $tmpdir/epub.zip -d $tmpdir`;
	if (-d "$tmpdir/OEBPS") {	
		$OEBPSdir = "$tmpdir/OEBPS";
	} elsif (-d "$tmpdir/OPS") {	
		$OEBPSdir = "$tmpdir/OPS";
	}
	
	my $tocfile;
	my $ncxfile = <$tmpdir/*.ncx>; 
	my $ncxfile_OEBPSdir = <$OEBPSdir/*.ncx>;
	if ($ncxfile) {
		$tocfile = $ncxfile;
	} else {
		$tocfile = $ncxfile_OEBPSdir;
	}

	# XML? FUCK IT, We'll do it LIVE.
	my @files;
	open(TOC, "<$tocfile") or warn "Can't open file:$tocfile\n$!";
	while (my $line = <TOC>) {
		$line =~ m#<content src=\"(.+)\"#;
		my $filename = $1;
		$filename =~ s/#(.+)$// if $filename;
		push(@files, $filename) if $filename;
	}


	# reusing something from my nowplaying.pl music stats script.
	my @unique;
	@files = removesubsequentdupes(@files);
	sub removesubsequentdupes {
		my @played = @_;
		my @unique;
		my $previoustrack = "";
		foreach my $track (@played) {
			if ($track eq $previoustrack) {
				next;
			} else {
				push(@unique, $track);
			}
			$previoustrack = $track;
		}
		return @unique;
	}

	foreach my $filename (@files) {
		$filename =~ s/OEBPS\///;
		#print "file: $filename\n";

		if ($OEBPSdir) {
			`cat \"$OEBPSdir/$filename\" >> $tmpdir/epub.html`;
		} else {
			`cat \"$tmpdir/$filename\" >> $tmpdir/epub.html`;
		}
	}

	`$html2text $tmpdir/epub.html > $tmpdir/epub.txt`;
	open(TXT, "<$tmpdir/epub.txt") or die "cannot open \$tmpdir/epub.txt\n$!";
	while (<TXT>) { s/<\?xml.+>//; $text .= $_; }

	$tmpdir =~ s#/OEBPS##;
	`rm -rf $tmpdir`;
	return $text;
}

sub updatereadinglist {
	return unless $makewebpage;
	# no one cares but me. you don't need this.
	# it isn't for you. go away now. go home.
	# its over.
	my $webfilename = shift;
	my $shortname = shift;

	$webfilename =~ s#/home/superkuh/library/#/library/#;
	#$webfilename =~ s#/home/superkuh/Library/#/users/superkuh/Library/#;
	#$webfilename = '/users/superkuh/Library/' . $shortname;
	
	#tkwarn "webpage: $webpage";
	my $date = getdate();
	#tkwarn "\n$date : <a href=\"$webfilename\">$shortname</a><br />\n";

	open (WEBSITE, ">>$webpage");
	print WEBSITE "<li>$date :: <a href=\"$webfilename\">$shortname</a></li>\n";
	close WEBSITE;
}