From 2d424062b75e96488e34eadd831bb9f5b6523ebb Mon Sep 17 00:00:00 2001 From: ThorstenB Date: Sat, 13 Oct 2012 14:42:22 +0200 Subject: [PATCH] Original ATIS voice generation scripts by John Denker Also thanks to Dave Luff for digging this up from his (t)rusty hard disk. --- scripts/atis/atis-lex.pl | 80 +++++++++ scripts/atis/find_nonUTF8.pl | 21 +++ scripts/atis/list-airports.pl | 150 ++++++++++++++++ scripts/atis/synth.pl | 314 +++++++++++++++++++++++++++++++++ scripts/atis/voice.pl | 102 +++++++++++ scripts/atis/words_per_line.sh | 43 +++++ 6 files changed, 710 insertions(+) create mode 100755 scripts/atis/atis-lex.pl create mode 100755 scripts/atis/find_nonUTF8.pl create mode 100755 scripts/atis/list-airports.pl create mode 100755 scripts/atis/synth.pl create mode 100755 scripts/atis/voice.pl create mode 100755 scripts/atis/words_per_line.sh diff --git a/scripts/atis/atis-lex.pl b/scripts/atis/atis-lex.pl new file mode 100755 index 000000000..583080e13 --- /dev/null +++ b/scripts/atis/atis-lex.pl @@ -0,0 +1,80 @@ +#! /usr/bin/perl -w + +sub usage { + print <<\EoF; +Read the atis_lexicon.hxx file and print +the vocabulary words ... plus phonetic digits and letters. + +See also list-airports.pl + +Typical usage: + (echo "/" + FG_ROOT=/games/$whatever/fgd ATIS_ONLY=yes ./list-airports.pl + FG_ROOT=/games/$whatever/fgd ./atis-lex.pl) > $whatever.vlist +EoF +} + +use strict; +use Symbol; + + my $fgroot = $ENV{'FG_ROOT'} || '.'; + +main: { + if (@ARGV) { + usage; + exit; + } + my $mapfn = "$fgroot/../fgs/src/ATCDCL/atis_lexicon.hxx"; + my $mapch = Symbol::gensym; + if (!open($mapch, '<', $mapfn)) { + print STDERR "Could not open abbreviation file '$mapfn'\n"; + print STDERR "Maybe you need to set FG_ROOT\n"; + exit(1); + } + while (my $line = <$mapch>) { + chomp $line; + if ($line =~ s/^[ \t]*Q[(]//) { + $line =~ s/[)][ \t]*$//; + print "$line\n"; + } + } + print <\n"; + +while( chomp($content = ) ) { + $length = length($content); + + for( $i = 0; $i < $length; $i++ ) { + + if( ord(substr($content, $i, 1)) > 127 ) + { + print "$content\n"; + last; + } + } +} +close(FILE); + +exit 0 diff --git a/scripts/atis/list-airports.pl b/scripts/atis/list-airports.pl new file mode 100755 index 000000000..a431fca68 --- /dev/null +++ b/scripts/atis/list-airports.pl @@ -0,0 +1,150 @@ +#! /usr/bin/perl -w + +sub usage { + print < atis.list +EoF +} + +use strict; +use Symbol; +my $noparen = 1; + + my $verbose = 0; + my $apt_name = ''; + my $lat; + my $lon; + my $atis; + my $country = ''; + my $elev; + my $tower; + my $bldgs; + my $apt_id; + my $shapefile; + my $namer = 'NAME'; + my $skipping = 0; + my $tot_apts = 0; + + my %states = (); + my %short_country = (); + + + my $fgroot = $ENV{'FG_ROOT'} || '.'; + my $atis_only = $ENV{'ATIS_ONLY'} || 0; + +sub process_apt { + if ($atis_only && ! $atis) { + return; + } + my $str .= $apt_name; + + $str =~ s' *$''; ## remove trailing spaces + if ($noparen) { + $str =~ s/[(][^)]*[)]?//g; + } + print "$str\n"; + $tot_apts++; +} + +my %remap = (); + +sub get_remap { + +# Note: in this context, GKI probably stands for Gereja Kristen Indonesia +# I guess the church builds lots of airports. + + my $mapfn = "$fgroot/../fgs/src/ATCDCL/atis_remap.hxx"; + my $mapch = Symbol::gensym; + if (!open($mapch, '<', $mapfn)) { + print STDERR "Could not open abbreviation file '$mapfn'\n"; + print STDERR "Maybe you need to set FG_ROOT\n"; + exit(1); + } + while (my $line = <$mapch>) { + chomp $line; + if ($line =~ s/[ \t]*REMAP[(]//) { + $line =~ s/[)][ \t]*$//; + my @stuff = split(',', $line, 2); + my $from = $stuff[0]; + my $to = $stuff[1]; + $to =~ s/^[ \t]*//; + if ($to eq 'NIL') { + $to = ''; + } + $remap{$from} = $to; + } + } +} + +main: { + + get_remap; + + my $delim = '-'; + my $incmd = 'zcat /games/sport/fgd/Airports/apt.dat.gz'; + my $inch = Symbol::gensym; + open ($inch, '-|', $incmd) + || die "Couldn't open pipe from '$incmd'\n"; + + + my $junk = <$inch>; + $junk = <$inch>; + liner: while (my $line = <$inch>) { + chomp $line; + my @stuff = split(' ', $line); + my $type = shift @stuff || 0; +### print "..$type ... $line ...\n"; + + if ($type == 1) { +## Here if new airport. +## +## First, print results of previous work, i.e. airport +## stanzas already seen ... since the apt.dat file +## doesn't have a clear way of signaling the end of a stanza. + if ($apt_name) { + process_apt(); + } + $apt_name = ''; + $atis = ''; + $lat = 0; + $lon = 0; + $country = ''; + + $elev = shift @stuff; + $tower = shift @stuff; + $bldgs = shift @stuff; + $apt_id = shift @stuff; + my $name = join $delim, @stuff; + + for my $from (keys %remap) { + my $to = $remap{$from}; + $name =~ s/\b$from\b/$to/gi; + } + +## option for plain words, not hyphenated phrases + if (1) { + $name =~ s/$delim/ /g; + } + + $apt_name = "$name"; + } + + if ($type == 10) { + $lat = $stuff[0]; + $lon = $stuff[1]; + } + + if ($type == 50) { + $atis = join(' ', @stuff); + } + } + process_apt(); ## flush out the very last one + print STDERR "Total airports: $tot_apts\n"; +} diff --git a/scripts/atis/synth.pl b/scripts/atis/synth.pl new file mode 100755 index 000000000..1cc0d2223 --- /dev/null +++ b/scripts/atis/synth.pl @@ -0,0 +1,314 @@ +#! /usr/bin/perl -w + +use strict; +use Symbol; +use Audio::Wav; + +sub usage { + print < 'windh', + 'romeo' => 'Rome E O', + 'xray' => 'X ray', +); + +main: { + + my $skip = 0; + my $oneword = 0; + my $gripe = 0; + my $out_bits_sample = 8; ## this is what FGFS expects + my @plain_args = (); + argx: while (@ARGV) { + my $arg = shift @ARGV; + $arg =~ s/^--/-/; + if ($arg eq '-help' || $arg eq '-h') { + usage; + exit(0); + } + if ($arg eq '-dump') { + my $ifn = shift @ARGV || 'foo.wav'; + my $wav = new Audio::Wav; + print "About to open '$ifn' ...\n"; + my $waver = $wav -> read( $ifn ); + print "xxxxxxxxxxxxxxxx\n"; + for my $detail (keys %{$waver->details()}) { + printf("%-20s %s\n", $detail, ${$waver->details()}{$detail}); + } + exit(0); + } + if ($arg eq '-skip') { + $skip++; + next argx; + } + if ($arg eq '-gripe') { + $gripe++; + next argx; + } + if ($arg eq '-one' || $arg eq '-1') { + $oneword++; + next argx; + } + if ($arg =~ '^-') { + die "Unrecognized option '$arg'\n"; + } + push @plain_args, $arg; + } + + my $nargs = @plain_args; + if ($nargs != 3) { + die "Wrong number of arguments ($nargs); for help try:\n $0 -help\n"; + } + my @todo = (); + my ($ifn, $indexfn, $out_wav) = @plain_args; + + my $inch = Symbol::gensym; + open ($inch, '<', $ifn) + || die "Couldn't open input file '$ifn'\n"; + +# Skip some lines from the input list, as requested: + for (my $ii = 0; $ii < $skip; $ii++) { + my $ignore = <$inch>; + } + +# Read the rest of the input file: + while (my $line = <$inch>) { + chomp($line); + if ($oneword) { + my @stuff = split(' ', $line, 2); + doit($stuff[0]); + } else { + for my $word (split(' ', $line)) { + doit($word); + } + } + } + close $inch; + +# Optionally print a list of things that the input file +# requested more than once. + if ($gripe) { + foreach my $thing (sort keys %count) { + if ($count{$thing} > 1) { + printf("%4d %s\n", $count{$thing}, $list{$thing}); + } + } + } + my $nsnip = (keys %list); + + if (0 && $nsnip > 10) { + $nsnip = 10; + } + print STDERR "nsnip: $nsnip\n"; + + my $index = Symbol::gensym; + open ($index, '>', $indexfn) + || die "Couldn't write index file '$indexfn'\n"; + + print $index "$nsnip\n"; + if (! -d 'snip') { + mkdir('snip') + || die "Could not create directory 'snip' : $!\n"; + } + +############## system "/bin/cp nothing.wav t1.wav"; + my $where = 0; + my $sample_rate = -1; + my $channels = -1; + my $bits_sample = -1; + my $ii = 0; + snipper: for my $thing (sort keys %list) { + $ii++; + my $iix = sprintf('%05d', $ii); + my $xfn = "./snip/x$iix"; + + my $fraise = lc($thing); + if (exists $fixup{$fraise}) { + #xxxx print "fixing $fraise\n"; + $fraise = $fixup{$fraise}; + } + +## This turns dashes and other funny stuff into spaces +## in the phrase to be processed: + $fraise =~ s%[^a-z']+% %gi; + if ($thing eq '/' || $thing eq '/_') { + system("/bin/cp quiet0.500.wav $xfn.wav"); + } else { + my $script = $proto; + $script =~ s/XXX/$fraise/; + $script =~ s|YYY|$xfn.wav|; + #xxxx print "$fraise ... $script\n"; + + my $cmd = '/usr/bin/festival'; + my $pipe = Symbol::gensym; + open ($pipe, '|-', $cmd) + || die "Couldn't open pipe to '$cmd'\n"; + print $pipe $script; + close $pipe; + if ($? != 0){ + print STDERR "Error in festival script: '$script'\n"; + next snipper; + } + } + + my $wav = new Audio::Wav; + my $waver = $wav -> read("$xfn.wav"); + if ($sample_rate < 0) { + $sample_rate = ${$waver->details()}{'sample_rate'}; + $channels = ${$waver->details()}{'channels'}; + $bits_sample = ${$waver->details()}{'bits_sample'}; + } else { + $sample_rate == ${$waver->details()}{'sample_rate'} + && $channels == ${$waver->details()}{'channels'} + && $bits_sample == ${$waver->details()}{'bits_sample'} + || die "audio format not the same: $xfn.wav"; + } + + my $statcmd = "2>&1 sox $xfn.wav -n stat"; + my $stat = Symbol::gensym; + open ($stat, '-|', $statcmd) + || die "Couldn't open pipe from '$statcmd'\n"; + my $vol = 0; + my $size = 0; + + my $lastline; + while (my $line = <$stat>) { + chomp $line; + $lastline = $line; + my @stuff = split ':', $line; + my $nw = @stuff; + #### print STDERR "$nw +++ $line\n"; + if ($nw == 2) { + if ($stuff[0] eq 'Volume adjustment') { + $vol = 0+$stuff[1]; + } + elsif ($stuff[0] eq 'Samples read') { + $size = 0+$stuff[1]; + } + } + } + my $status = close $stat; + if ($?) { + print STDERR "Stat command failed: $statcmd\n" . ": $lastline \n"; + next snipper; + } + if ($size == 0) { + print STDERR "?Warning! Zero-size audio file for $iix '$thing'\n"; + } + printf("%s %6.3f %6d '%s'\n", $iix, $vol, $size, $thing); + my $subsize = int($size/2); + printf $index ("%-45s %10d %10d\n", $thing, $where, $subsize); + $where += $subsize; + + + my $volume_cmd = sprintf("sox -v %6.3f %s.wav %s.raw", + $vol*0.9, $xfn, $xfn); + ########## print "+ $volume_cmd\n"; + if (1) { + my $vol_handle = Symbol::gensym; + open ($vol_handle, '|-', $volume_cmd) + || die "Couldn't open pipe to command '$volume_cmd'\n"; + + close $vol_handle; + if ($?) { + die "Volume command failed: $statcmd\n" . ": $lastline"; + } + } + push @todo, "$xfn.raw"; + } + close $index; + + my $cat_cmd = "cat " . join(' ', @todo) . " > ./snip/everything.raw"; + my $cat_handle = Symbol::gensym; + open ($cat_handle, '|-', $cat_cmd) + || die "Couldn't open pipe to command '$cat_cmd'\n"; + close $cat_handle; + if ($?) { + die "Cat command failed: $cat_cmd"; + } + + my $wav_cmd = "sox --rate $sample_rate --bits $bits_sample" + . " --encoding signed-integer" + . " ./snip/everything.raw --rate 8000 --bits $out_bits_sample $out_wav"; + + my $wav_handle = Symbol::gensym; + open ($wav_handle, '|-', $wav_cmd) + || die "Couldn't open pipe to command '$wav_cmd'\n"; + close $wav_handle; + if ($?) { + die ".wav command failed: $wav_cmd"; + } +} diff --git a/scripts/atis/voice.pl b/scripts/atis/voice.pl new file mode 100755 index 000000000..f90c2f6f5 --- /dev/null +++ b/scripts/atis/voice.pl @@ -0,0 +1,102 @@ +#! /usr/bin/perl -w + +use strict; +use Symbol; + +sub usage { + print <> tmp.raw "; + print "$cmd\n"; + system $cmd; + my $end = $start{$arg} + $len{$arg}; + print "$start{$arg} + $len{$arg} = $end\n"; + } else { + print "Can't find '$arg'\n"; + } +} + + +sub setup{ + my $inch = Symbol::gensym(); + my $file = "$dir/voice.vce"; + open($inch, "<$file") || die "Cannot open input file '$file'\n"; + my $header = <$inch>; + chomp $header; + my $ii=1; + liner: while (my $line = <$inch>){ + chomp $line; + my @word = split(" ", $line); + my $nn = @word; + if ($nn != 3) { + next liner; + } + my $id = lc($word[0]); + my $st = $word[1]; + my $ln = $word[2]; + if ($ln =~ s/^x//) { + $ln = $ln - $st; + print "$id $st $ln\n"; + } + $start{$id} = $st; + $len{$id} = $ln; + ##print "$ii $nn '$line'\n"; + $ii++; + } + print "(($header)) --> $ii\n"; +} \ No newline at end of file diff --git a/scripts/atis/words_per_line.sh b/scripts/atis/words_per_line.sh new file mode 100755 index 000000000..ce18d895f --- /dev/null +++ b/scripts/atis/words_per_line.sh @@ -0,0 +1,43 @@ +#! /bin/bash + +## +## + +if test "x$1" = "x-h" ; then + 1>&2 echo "Usage: " + 1>&2 echo " $0 [filename]" + 1>&2 echo "Read words from input, treating all whitespace like," + 1>&2 echo "and write exactly N words per line on output." + 1>&2 echo "Options: " + 1>&2 echo " -n [N] specify N (default: 1)" + 1>&2 echo " filename = '-' or '' ==> read from standard input" + exit 1 +fi + +: ${wordmax:=1} +files="" + + +while test -n "$*" ; do + this=$1 ; shift + case $this in + -n) wordmax=$1 ; shift + ;; + *) files="$files $this" + ;; + esac +done + + +awk '{ + for (ii = 1; ii <=NF; ii++) { + printf ("%s", $ii); + words++; + if (words >= wordmax) { + print ""; + words = 0; + } else { + printf (" "); + } + } +}' wordmax=$wordmax $files -- 2.39.5