#!/usr/bin/perl -w
# $Id$

# l2poe is Copyright 2000 by Rocco Caputo.  All rights are reserved.
# l2poe is free software; you may redistribute it and/or modify it
# under the same terms as Perl itself.

# l2poe is an adaption of the ideas in Libero.  Libero lives at
# <http://www.imatix.com/>.

use strict;

use Getopt::Long;

sub TRACE_PARSE    () { 0 } # trace the libero parse phase
sub TRACE_GENERATE () { 0 } # trace the perl generate phase

#------------------------------------------------------------------------------
# Accept options from the command line, and figure out what to do.

my %option =
  ( help    => 0,
    version => 0,
    perl    => 1,
    dot     => 0,
  );

my @options =
  ( 'help|h|?',  # get help
    'perl!',     # turn perl output on/off
    'dot!',      # turn dot output on/off
  );

$option{help} = 1 unless GetOptions( \%option, @options );

my $input_file = shift @ARGV;
if (defined $input_file and length $input_file) {
  die "Unknown input file '$input_file'.\n" unless -f $input_file;
}
else {
  $option{help} = 1;
}

if ($option{help}) {
  print STDERR
    ( "Program: $0\n",
      '$Id$ ', "\n",
      "\n",
      "Copyright 2000 by Rocco Caputo <troc\@netrus.net>.  All rights\n",
      "are reserved.  This program is free software; you may redistribute\n",
      "and/or modify it under the same terms as Perl itself.\n",
      "\n",
      "This program accepts a Libero-like source file and generates Perl\n",
      "code and/or a DAG description file suitable for dot (graphviz).\n",
      "\n",
      "Usage: l2poe [options] input.l\n",
      "\n",
      "OPTIONS\n",
      "\n",
      "  -help or -h   Display this message.\n",
      "\n",
      "  -[no]perl     Generate a Perl file.  The generated Perl file will\n",
      "                be named after the input file, but it will end in\n",
      "                '.perl'.  If the input filename ends in '.l', that\n",
      "                will be removed first.  This is on by default.\n",
      "\n",
      "  -[no]dot      Generate a DAG (directed acyclic graph) file for\n",
      "                for graphviz's dot program.  The file will be named\n",
      "                after the input file, but it will end in '.dot'\n",
      "                instead of '.l'.  This option is off by default.\n",
      "\n"
    );
  exit 0;
}

#------------------------------------------------------------------------------
# Load the source file, and populate data structures.

my ( %subs, %machines, %defines, @template, %macros,
     $machine_name, $sub_name, $machine, $state, $event, $template_lines,
   );

sub MACRO_LINE     () { 0 }
sub MACRO_VALUE    () { 1 }

sub SUB_LINE       () { 0 }
sub SUB_BODY       () { 1 }

sub MACHINE_LINE   () { 0 }
sub MACHINE_BODY   () { 1 }
sub MACHINE_START  () { 2 }
sub MACHINE_NUMBER () { 3 }

sub STATE_ANCESTOR () { 0 }
sub STATE_LINE     () { 1 }
sub STATE_EVENTS   () { 2 }
sub STATE_START    () { 3 }
sub STATE_NUMBER   () { 4 }

sub EVENT_LINE     () { 0 }
sub EVENT_ACTIONS  () { 1 }
sub EVENT_STATE    () { 2 }
sub EVENT_EVENT    () { 3 }
sub EVENT_NUMBER   () { 4 }

sub ACTION_LINE    () { 0 }
sub ACTION_TYPE    () { 1 }
sub ACTION_BODY    () { 2 }
sub ACTION_STATE   () { 3 }
sub ACTION_EVENT   () { 4 }
sub ACTION_NUMBER  () { 5 }

sub AT_FUNCTION    () { 'function' }
sub AT_REGEXP      () { 'regexp'   }

sub SECT_UNKNOWN   () { 0x00 }
sub SECT_SUBS      () { 0x01 }
sub SECT_STATES    () { 0x02 }
sub SECT_TEMPLATE  () { 0x04 }
sub SECT_MACROS    () { 0x08 }

my $section = SECT_UNKNOWN;

open INPUT, "<$input_file" or die "Can't open input file '$input_file': $!\n";
while (<INPUT>) {
  chomp;

  ### Section heads.

  if (/^\#\s*\<\s*([^=]\S+)\s*(.*?)\s*\>\s*$/) {
    die "ending a section inside a sub definition at line $.\n"
      if defined $sub_name;

    undef $machine_name;
    undef $sub_name;
    undef $machine;
    undef $state;
    undef $event;
    $template_lines = 0;

    if ($1 eq 'subs') {
      $section = SECT_SUBS;
      next;
    }

    if ($1 eq 'macros') {
      $section = SECT_MACROS;
      next;
    }

    if ($1 eq 'states') {
      die "no machine name specified at line $." unless defined $2;

      $section = SECT_STATES;
      $machine_name = $2;
      $machine_name =~ tr[-][_];

      if (exists $machines{$machine_name}) {
        warn "machine \"$1\" defined twice:\n";
        warn("  the first time at line ",
             $machines{$machine_name}->[MACHINE_LINE], "\n"
            );
        die  "  the second time at line $.\n";
      }

      $machine = $machines{$machine_name} =
        [ $.,    # MACHINE_LINE
          { },   # MACHINE_BODY
          undef, # MACHINE_START
          undef, # MACHINE_NUMBER
        ];

      next;
    }

    if ($1 eq 'template') {
      $section = SECT_TEMPLATE;
      next;
    }

    # These aren't really sections.  They're insertions.  These should
    # go last; new sections ought to be added before here.

    if ($1 eq 'generate' or
        $1 eq 'spawn-inline' or $1 eq 'spawn-method' or $1 eq 'spawn-function'
       ) {
      $template_lines++;

      die "invoking \"$1\" directive outside of a template at line $.\n"
        unless $section & SECT_TEMPLATE;

      my $machine_name = $2;
      $machine_name =~ tr[-][_];
      die "invoking undefined state machine '$2' at line $.\n"
        unless exists $machines{$machine_name};
    }

    # Die on an unknown source directive.  Don't die on <generate>, etc,
    # because they're used later.

    else {
      die "unknown source directive \"$1\" at line $.\n";
    }
  }

  ### Template section.

  if ($section & SECT_TEMPLATE) {
    next if /^\s*$/ and not $template_lines;
    $template_lines++;
    push @template, "$_";
    next;
  }

  ### Some other section.

  next if /^\s*$/;
  next if /^\s*\#/;

  ### Macros section.

  if ($section & SECT_MACROS) {
    my ($macro_name, $macro_value);

    # Unadorned macros are expanded into runstate variables.
    if (/^\s*(\S+)\s*$/) {
      ($macro_name, $macro_value) = ($1, "\$_[RUNSTATE]->{$1}");
    }

    # Adorned ones are expanded into whatever adorns them.
    elsif (/^\s*(\S+)\s*\=\s*(.+?)\s*$/) {
      ($macro_name, $macro_value) = ($1, $2);
    }

    # Oops.
    else {
      die "syntax error: that's not a macro definition you have at line $.\n";
    }

    # Can't reassign macros, sorry.
    if (exists $macros{$macro_name}) {
      warn "macro \"$1\" defined twice:\n";
      warn("  the first time at line ",
           $macros{$macro_name}->[MACRO_LINE], "\n"
          );
      die  "  the second time at line $.\n";
    }

    $macros{$macro_name} =
      [ $.,            # MACRO_LINE
        $macro_value,  # MACRO_VALUE
      ];

    next;
  }

  ### Subs section.

  # Multi-line subs.

  if (/^sub\s+(\S+)\s+\{/) {
    die "sub definition outside 'subs' section at line $.\n"
      unless $section & SECT_SUBS;
    die "cannot nest sub definitions at line $.\n" if defined $sub_name;

    $sub_name = $1;
    $sub_name =~ tr[-][_];

    if (exists $subs{$sub_name}) {
      warn "sub \"$1\" defined twice:\n";
      warn("  the first time at line ",
           $subs{$sub_name}->[SUB_LINE], "\n"
          );
      die  "  the second time at line $.\n";
    }

    $subs{$sub_name} =
      [ $.,  # SUB_LINE
        [ ], # SUB_BODY
      ];

    next;
  }

  # Single-line subs.  These are quite common.

  if (/^sub\s+(\S+)\s+(.+?)\s*$/) {
    die "sub definition outside 'subs' section at line $.\n"
      unless $section & SECT_SUBS;
    die "cannot nest sub definitions at line $.\n" if defined $sub_name;

    $sub_name = $1;
    $sub_name =~ tr[-][_];

    if (exists $subs{$sub_name}) {
      warn "sub \"$1\" defined twice:\n";
      warn("  the first time at line ",
           $subs{$sub_name}->[SUB_LINE], "\n"
          );
      die  "  the second time at line $.\n";
    }

    $subs{$sub_name} =
      [ $.,         # SUB_LINE
        [ "  $2" ], # SUB_BODY
      ];

    undef $sub_name;

    next;
  }

  # Within a sub definition.

  if (defined $sub_name) {

    # End a sub.
    if (/^\}/) {
      undef $sub_name;
    }
    else {
      push @{$subs{$sub_name}->[SUB_BODY]}, $_;
    }

    next;
  }

  ### States section.

  # Begin a state.

  if (/^\s*(\S+)\s*\:\s*$/) {
    die "state definition outside 'states' section at line $.\n"
      unless $section & SECT_STATES;

    my ($state_name, $ancestor_name) = ($1, $2);
    $state_name =~ tr[-][_];
    $ancestor_name =~ tr[-][_] if defined $ancestor_name;

    if (exists $machine->[MACHINE_BODY]->{$state_name}) {
      warn "state \"$1\" defined twice:\n";
      warn("  the first time at line ",
           $machine->[MACHINE_BODY]->{$state_name}->[STATE_LINE], "\n"
          );
      die  "  the second time at line $.\n";
    }

    TRACE_PARSE and do {
      print STDERR "parsing state($1)";
      print STDERR " inherits($2)" if defined $state_name;
      print STDERR "\n";
    };

    undef $event;
    $state = $machine->[MACHINE_BODY]->{$state_name} =
      [ $ancestor_name,  # STATE_ANCESTOR
        $.,              # STATE_LINE
        { },             # STATE_EVENTS
        undef,           # STATE_START
        undef,           # STATE_NUMBER
      ];

    $machine->[MACHINE_START] = $state_name
      unless defined $machine->[MACHINE_START];

    next;
  }

  # State attributes.
  if (/^\s*\(\s*(\S+)\s*(.*?)\s*\)\s*$/) {
    die "defining a state attribute outside the 'states' section at line $.\n"
      unless $section & SECT_STATES;
    die "defining a state attribute before a state at line $.\n"
      unless defined $state;
    die "defining a state attribute after an event at line $.\n"
      if defined $event;

    TRACE_PARSE and do {
      print STDERR "    parsing attribute($1)";
      print STDERR " value($2)" if defined $2;
      print STDERR "\n";
    };

    my ($attribute, $value) = ($1, $2);

    # need to implement here

    next;
  }

  # Begin a regexp.

  if (/^\s*(\/.+?)(?:\s*\-\>\s*(.+?)(?:\s*\:\s*(.+?))?)?\s*$/) {
    die "defining a regexp action outside the 'states' section at line $.\n"
      unless $section & SECT_STATES;
    die "defining a regexp action before a state at line $.\n"
      unless defined $state;
    die "defining a regexp action before an event at line $.\n"
      unless defined $event;
    die "defining a regexp action without a transition at line $.\n"
      unless defined $2;

    my ($regexp_body, $regexp_next, $event_next) = ($1, $2, $3);
    $regexp_next =~ tr[-][_];

    if (defined $event_next) {
      $event_next =~ tr[-][_];
    }
    else {
      $event_next = 'enter';
    }

    TRACE_PARSE and
      print STDERR "    parsing regexp($1) goto($2)\n";

    push( @{$event->[EVENT_ACTIONS]},
          [ $.,           # ACTION_LINE
            AT_REGEXP,    # ACTION_TYPE
            $regexp_body, # ACTION_BODY
            $regexp_next, # ACTION_STATE
            $event_next,  # ACTION_EVENT
            undef,        # ACTION_NUMBER
          ]
        );

    next;
  }

  # Begin an action.

  if (/^\s*\&\s*(\S+)(?:\s*\-\>\s*(.+?)(?:\s*\:\s*(.+?))?)?\s*$/) {
    die "defining an action outside the 'states' section at line $.\n"
      unless $section & SECT_STATES;
    die "defining an action before a state at line $.\n"
      unless defined $state;
    die "defining an action before an event at line $.\n"
      unless defined $event;

    my ($action_body, $action_next, $event_next) = ($1, $2, $3);
    $action_body =~ tr[-][_];
    $action_next =~ tr[-][_] if defined $action_next;

    if (defined $event_next) {
      $event_next =~ tr[-][_];
    }
    else {
      $event_next = 'enter';
    }

    die "invoking sub \"$1\" before it's defined at line $.\n"
      unless exists $subs{$action_body};

    TRACE_PARSE and do {
      print STDERR "    parsing action($1)";
      print STDERR " goto($2)" if defined $action_next;
      print STDERR "\n";
    };

    push( @{$event->[EVENT_ACTIONS]},
          [ $.,           # ACTION_LINE
            AT_FUNCTION,  # ACTION_TYPE
            $action_body, # ACTION_BODY
            $action_next, # ACTION_STATE
            $event_next,  # ACTION_EVENT
            undef,        # ACTION_NUMBER
          ]
        );

    next;
  }

  # Begin an event.

  if (/^\s*(\S+)(?:\s*\-\>\s*(.+?)(?:\s*\:\s*(.+?))?)?\s*$/) {
    die "defining an event outside the 'states' section at line $.\n"
      unless $section & SECT_STATES;
    die "defining an event before a state at line $.\n"
      unless defined $state;

    my ($event_name, $event_next, $event_event) = ($1, $2, $3);
    $event_name =~ tr[-][_];
    $event_next =~ tr[-][_] if defined $event_next;

    if (defined $event_event) {
      $event_event =~ tr[-][_];
    }
    else {
      $event_event = 'enter';
    }

    if (exists $state->[STATE_EVENTS]->{$event_name}) {
      warn "event \"$1\" defined twice:\n";
      warn("  the first time at line ",
           $state->[STATE_EVENTS]->{$event_name}->[EVENT_LINE], "\n"
          );
      die "   the second time at line $.\n";
    }

    TRACE_PARSE and do {
      print STDERR "  parsing event($1)";
      print STDERR "\n";
    };

    $event = $state->[STATE_EVENTS]->{$event_name} =
      [ $.,           # EVENT_LINE
        [ ],          # EVENT_ACTIONS
        $event_next,  # EVENT_STATE
        $event_event, # EVENT_EVENT
        undef,        # EVENT_NUMBER
      ];

    $state->[STATE_START] = $event_name
      unless defined $state->[STATE_START];

    next;
  }

  warn "Syntax error in line $.:\n";
  die "$_\n";
}

close INPUT;

#------------------------------------------------------------------------------
# Late fixups.  Number the nodes, now that they're all here, and
# verify that edges go somewhere known.

my $machine_number = 0;
foreach my $machine_name (sort keys %machines) {
  my $machine = $machines{$machine_name};
  my $machine_node = $machine->[MACHINE_NUMBER] =
    'm' . $machine_number++;

  my $state_number = 0;
  foreach my $state_name (sort keys %{$machine->[MACHINE_BODY]}) {
    my $state = $machine->[MACHINE_BODY]->{$state_name};
    my $state_node = $state->[STATE_NUMBER] =
      $machine_node . 's' .$state_number++;

    my $event_number = 0;
    foreach my $event_name (sort keys %{$state->[STATE_EVENTS]}) {
      my $event = $state->[STATE_EVENTS]->{$event_name};
      my $event_node = $event->[EVENT_NUMBER] =
        $state_node . 'e' . $event_number++;

      my $action_number = 0;
      foreach my $action (@{$event->[EVENT_ACTIONS]}) {
        my $action_next = $action->[ACTION_STATE];
        $action->[ACTION_NUMBER] = $event_node . 'a' . $action_number++;

        if (defined $action_next) {
          die( "trying to move from machine \"$machine_name\", state ",
               "\"$state_name\", event \"$event_name\" to nonexistent ",
               "new state \"$action_next\"\n"
             )
            if not exists $machine->[MACHINE_BODY]->{$action_next};

          my $event_next  = $action->[ACTION_EVENT];
          die( "trying to move from machine \"$machine_name\", state ",
               "\"$state_name\", event \"$event_name\" to nonexistent ",
               "event \"$event_next\" in state \"$action_next\"\n"
             )
            if ( not exists
                 $machine->[MACHINE_BODY]->
                 {$action_next}->[STATE_EVENTS]->{$event_next}
               );
        }
      }
    }
  }
}

#------------------------------------------------------------------------------
# Generate Perl output.  Because the "regenerate a state machine" is
# back-ended to a sub somewhere, it's possible to change that out and
# generate some other language.  In fact, that sub should be all that
# needs to change for this to generate other languages.

if ($option{perl}) {

  # More late fixups.  This time, we prepare the macro regexp for
  # search-and-replace.
  my $macro_regexp;
  $macro_regexp = text_trie_as_regexp(text_trie_trie(keys %macros))
    if keys %macros;

  my $perl_file = $input_file;
  $perl_file =~ s/(\.l)?$/.perl/;

  open(PERL, ">$perl_file") or die "Can't write '$perl_file': $!\n";
  my $old_select = select(PERL);

  foreach my $line (@template) {

    # Generate a state machine.

    if ($line =~ /^\#\s*\<\s*(\S+)\s*(.+?)\s*\>\s*$/) {
      my $machine_name = $2;
      $machine_name =~ tr[-][_];

      my @insert;
      if ($1 eq 'generate') {
        @insert = &generate_machine($machine_name);
      }
      elsif ($1 eq 'spawn-inline') {
        @insert = &generate_inline_spawn($machine_name);
      }
      elsif ($1 eq 'spawn-method') {
        @insert = &generate_method_spawn($machine_name);
      }
      elsif ($1 eq 'spawn-function') {
        @insert = &generate_function_spawn($machine_name);
      }
      else {
        die "inconsistent generate directive \"$1\" shouldn't be";
      }

      foreach my $insert (@insert) {
        # Substitute macros.
        $insert =~ s/\b($macro_regexp)\b/$macros{$1}->[MACRO_VALUE]/sg
          if defined $macro_regexp;
        print "$insert\n";
      }

      next;
    }

    # Substitute macros.
    $line =~ s/\b($macro_regexp)\b/$macros{$1}->[MACRO_VALUE]/sg
      if defined $macro_regexp;

    print "$line\n";
  }

  select($old_select);
  close PERL;
}

#------------------------------------------------------------------------------
# Generate DAG output (dot file) for graphviz.

if ($option{dot}) {
  my $dot_file = $input_file;
  $dot_file =~ s/(\.l)?$/.dot/;

  open(DOT, ">$dot_file") or die "Can't write '$dot_file': $!\n";
  my $old_select = select(DOT);

  my $digraph_name = 'digraph_' . $input_file;
  $digraph_name =~ s/\.l$//;
  $digraph_name =~ tr[-][_];

  print( "// DAG file for $input_file\n",
         "// Generated by $0 at ", scalar(gmtime()), " GMT\n",
         "\n",
         "digraph $digraph_name {\n",
         "  center=1;\n",
         "  mclimit=128;\n",
         "  nslimit=128;\n",
         "\n",
       );

  # dot source for interstate edges is generated last, so that nodes
  # aren't vivified before they're defined.  I'm not sure if this is
  # important, but it feels like the proper thing to do.

  sub EDGE_FROM       () { 0 }
  sub EDGE_TO         () { 1 }
  sub EDGE_LABEL      () { 2 }
  sub EDGE_HEAD_LABEL () { 3 }
  sub EDGE_TAIL_LABEL () { 4 }
  sub EDGE_WEIGHT     () { 5 }
  my @interstate_edges;

  # Every machine is a subgraph.

  foreach my $machine_name (sort keys %machines) {
    my $machine = $machines{$machine_name};
    my $machine_number = $machine->[MACHINE_NUMBER];

    print( "  subgraph cluster_$machine_number {\n",
           "    label=\"Machine: $machine_name\";\n",
           "\n",
         );

    # Every state is a subgraph.

    foreach my $state_name (sort keys %{$machine->[MACHINE_BODY]}) {
      my $state = $machine->[MACHINE_BODY]->{$state_name};
      my $state_number = $state->[STATE_NUMBER];

      print( "    subgraph cluster_$state_number {\n",
             "      label=\"State: $state_name\";\n",
             "\n",
           );

      # Track edges that leave this state.  If two or more edges leave
      # one state for the same destination, they're manually
      # concentrated within the state.  This makes much cleaner
      # graphs.

      sub OUT_SOURCE     () { 0 }
      sub OUT_TAIL_LABEL () { 1 }
      sub OUT_WEIGHT     () { 2 }
      my %outbound;

      # Each event begins a chain of zero or more actions.

      foreach my $event_name (sort keys %{$state->[STATE_EVENTS]}) {
        my $event = $state->[STATE_EVENTS]->{$event_name};
        my $last_action_node  = $event->[EVENT_NUMBER];
        my $last_action_label;
        my $branch_weight = 1;

        # Events are ellipses.
        print "      $last_action_node [label=\"$event_name\"];\n";

        # Actions under an event are treated as a chain.  They're sort
        # of like little flowcharts: the ellipse for the event is like
        # "start" on the flowchart, and the actions are either boxes
        # (not branching) or diamonds (branches).

        my $action_number = 0;
        while ($action_number < @{$event->[EVENT_ACTIONS]}) {
          my $action = $event->[EVENT_ACTIONS]->[$action_number++];

          my $action_type = $action->[ACTION_TYPE];
          my $action_body = $action->[ACTION_BODY];
          my $action_next = $action->[ACTION_STATE];
          my $event_next  = $action->[ACTION_EVENT];

          # Double up the backwhacks.
          $action_body =~ s/\\/\\\\\\\\/g;

          # The action is a condition, so it's in a diamond, and it's
          # all by its little lonesome.

          if (defined $action_next) {
            print( "      $action->[ACTION_NUMBER] ",
                   "[label=\"$action_body\",shape=diamond];\n"
                 );
          }

          # Otherwise it's an unconditional action, and it belongs
          # inside a rectangle.

          else {

            # Chains of unconditional actions are glommed together
            # into the same rectangle.  This optimizes graphs
            # somewhat.

            while ($action_number < @{$event->[EVENT_ACTIONS]}) {
              my $forward_action = $event->[EVENT_ACTIONS]->[$action_number];
              last if defined $forward_action->[ACTION_STATE];

              # Double up the backwhacks.
              my $more_body = $forward_action->[ACTION_BODY];
              $more_body =~ s/\\/\\\\\\/g;

              $action_body .= ";\\n" . $more_body;
              $action_number++;
            }

            # Emit the rectangle for one or more unconditional
            # actions.

            print( "      $action->[ACTION_NUMBER] ",
                   "[label=\"$action_body;\",shape=box];\n"
                 );
          }

          # This is the edge that enters the current action from the
          # previous action (or event).

          print( "      $last_action_node -> $action->[ACTION_NUMBER] ",
                 "[weight=127,minlen=1",
               );
          if (defined $last_action_label) {
            print ",taillabel=\"$last_action_label\"";
            undef $last_action_label;
          }
          print "];\n";

          # Record an outbound edge for a branching event.  Outbound
          # edges are emitted at the end of the state definition.

          if (defined $action_next) {
            my $destination = ( $machine->[MACHINE_BODY]->{$action_next}->
                                [STATE_EVENTS]->{$event_next}->[EVENT_NUMBER]
                              );

            my $out_rec =
              [ $action->[ACTION_NUMBER], # OUT_SOURCE
                'yes',                    # OUT_TAIL_LABEL
                $branch_weight,           # OUT_WEIGHT
              ];

            if (exists $outbound{$destination}) {
              push @{$outbound{$destination}}, $out_rec;
            }
            else {
              $outbound{$destination} = [ $out_rec ];
            }

            $last_action_label = 'no';
          }

          $last_action_node = $action->[ACTION_NUMBER];

          # Increase the branch weight for each level down.  This
          # makes nicer looking graphs for some reason.

          $branch_weight++;
          $branch_weight = 100 if $branch_weight > 100;
        }

        # Each event can have a default branch.  This branch is taken
        # if none of the actions branch first.

        my $next_state = $event->[EVENT_STATE];
        if (defined $next_state) {
          my $next_event = $event->[EVENT_EVENT];
          my $destination = ( $machine->[MACHINE_BODY]->{$next_state}->
                              [STATE_EVENTS]->{$next_event}->[EVENT_NUMBER]
                            );

          my $out_rec =
            [ $last_action_node,   # OUT_SOURCE
              $last_action_label,  # OUT_TAIL_LABEL
              $branch_weight,      # OUT_WEIGHT
            ];

          if (exists $outbound{$destination}) {
            push @{$outbound{$destination}}, $out_rec;
          }
          else {
            $outbound{$destination} = [ $out_rec ];
          }

          undef $last_action_label;
        }
      }

      # The state's nodes and internal edges have been emitted.
      # Optimize outbound edges now.  If there are two or more
      # transitions to the same destination, have them all route to a
      # "concentrator" node within the cluster.
  
      foreach my $destination (keys %outbound) {
        my ($source, $tail_label);
        my $weight = 0;

        # Concentrate if more than one transition.
        if (@{$outbound{$destination}} > 1) {

          # Make a tiny circle to concentrate outbound edges.
          my $concentrator_id = $state_number . '_' . $destination;
          print( "      $concentrator_id [label=\" \",shape=house,",
                 "height=0.5,width=0.33,fixedsize=true,orientation=180];\n"
               );

          # Make all the sources point to the circle.
          foreach my $dest_rec (@{$outbound{$destination}}) {
            print "      $dest_rec->[OUT_SOURCE] -> $concentrator_id [";
            print "label=\"$dest_rec->[OUT_TAIL_LABEL]\","
              if defined $dest_rec->[OUT_TAIL_LABEL];
            print "minlen=1,weight=$dest_rec->[OUT_WEIGHT]";
            print "];\n";

            $weight = $dest_rec->[OUT_WEIGHT]
              if $dest_rec->[OUT_WEIGHT] > $weight;
          }

          # Point the concentrator to its destination.
          $source = $concentrator_id;
        }

        # Nothing to concentrate.
        else {
          $source     = $outbound{$destination}->[0]->[OUT_SOURCE];
          $tail_label = $outbound{$destination}->[0]->[OUT_TAIL_LABEL];
          $weight     = $outbound{$destination}->[0]->[OUT_WEIGHT];
        }

        # Point from the concentrator (or whatever) to the next state.
        push( @interstate_edges,
              [ $source,       # EDGE_FROM
                $destination,  # EDGE_TO
                undef,         # EDGE_LABEL
                undef,         # EDGE_HEAD_LABEL
                $tail_label,   # EDGE_TAIL_LABEL
                $weight,       # EDGE_WEIGHT
              ]
            );
      }

      # Close the state cluster.
      print( "\n",
             "    } // cluster_$state_number\n",
             "\n",
           );

    }

    # All states are done now.  The interstate edges remaining go from
    # concentrators or lone things to entry events.  The gist is that
    # there is only one interstate edge between any given pair of
    # states.  Anyway, add these edges, and make them a little darker
    # for clarity.

    foreach my $edge (@interstate_edges) {
      my $edge_label      = $edge->[EDGE_LABEL];
      my $edge_head_label = $edge->[EDGE_HEAD_LABEL];
      my $edge_tail_label = $edge->[EDGE_TAIL_LABEL];

      print( "    $edge->[EDGE_FROM] -> $edge->[EDGE_TO] [",
             "style=bold,minlen=1",
             ",weight=$edge->[EDGE_WEIGHT]",
           );

      print ",label=\"$edge_label\"" if defined $edge_label;
      print ",headlabel=\"$edge_head_label\"" if defined $edge_head_label;
      print ",taillabel=\"$edge_tail_label\"" if defined $edge_tail_label;

      print "];\n"
    }

    print( "\n",
           "  } // cluster_$machine_number\n",
         );
  }

  print( "\n",
         "} // $digraph_name\n",
       );

  select($old_select);
  close DOT;
}

#------------------------------------------------------------------------------
# Helper to generate a state machine from its definition.

sub generate_machine {
  my $machine_name = shift;
  my @machine;

  push @machine, "my \%$machine_name =";
  my $states = 0;

  my $machine = $machines{$machine_name};

  my @state_names = sort keys %{$machine->[MACHINE_BODY]};
  foreach my $state_name (@state_names) {
    TRACE_GENERATE and
      print STDERR "generating state \"$state_name\"\n";

    push @machine, '  ' . ($states++ ? '  ' : '( ') . $state_name . ' =>';

    my $events = 0;

    my $state = $machine->[MACHINE_BODY]->{$state_name};
    my @event_names = sort keys %{$state->[STATE_EVENTS]};
    foreach my $event_name (@event_names) {
      TRACE_GENERATE and
        print STDERR "  generating event \"$event_name\"\n";

      push( @machine,
            '    ' . ($events++ ? '  ' : '{ ') . $event_name . ' => sub {'
          );

      my $event = $state->[STATE_EVENTS]->{$event_name};
      foreach my $action (@{$event->[EVENT_ACTIONS]}) {
        my $action_type = $action->[ACTION_TYPE];
        my $action_body = $action->[ACTION_BODY];
        my $action_next = $action->[ACTION_STATE];
        my $event_next  = $action->[ACTION_EVENT];

        TRACE_GENERATE and
          print STDERR "    generating $action_type action \"$action_body\"\n";

        if ($action_type eq AT_FUNCTION) {
          my $lines = @{$subs{$action_body}->[SUB_BODY]};

          # Body has no lines.  If there *is* a next state after this,
          # it's ignored because the empty sub always returns false.

          if ($lines == 0) {
            push @machine, "        # sub: $action_body (is empty)";
            next;
          }

          push @machine, "        # sub: $action_body";

          # One or more lines.  If there's a next action, then we have
          # to wrap it in "do BLOCK".

          if (defined $action_next) {
            push @machine, '        do {';
            foreach my $body_line (@{$subs{$action_body}->[SUB_BODY]}) {
              push @machine, '        ' . $body_line;
            }
            push( @machine,
                  "        } and return \$_[MACHINE]->goto_state( " .
                   "'$action_next', '$event_next' );"
                );
            next;
          }

          # No next action means unadorned lines.
          foreach my $body_line (@{$subs{$action_body}->[SUB_BODY]}) {
            push @machine, '      ' . $body_line;
          }
        }
        elsif ($action_type eq AT_REGEXP) {
          push( @machine,
                '        # recognizer',
                "        \$_[ARG0] =~ $action_body",
                "          and return \$_[MACHINE]->goto_state( " .
                "'$action_next', '$event_next' );"
              );
        }
        else {
          die "unknown action type \"$action_type\" ... $0 is broken";
        }

      } # action

      if (defined $event->[EVENT_STATE]) {
        push( @machine,
              '        # default transition',
              "        \$_[MACHINE]->goto_state( '$event->[EVENT_STATE]', " .
              "'$event->[EVENT_EVENT]' );"
            );
      }

      push @machine, "      },";
      
    } # event
    
    push @machine, "    },";
    
  } # state

  push( @machine, '  );' );

  return @machine;
}

#------------------------------------------------------------------------------
# Helpers to generate a spawn function.  The "function" spawn is a
# function named "spawn_machine_name".  The "method" spawn is just
# named "spawn" and assumes it's the only one of its kind in the
# package.  The "inline" spawn is inline code.

sub generate_function_spawn {
  my $machine_name = shift;
  my $machine = $machines{$machine_name};

  ( "sub spawn_$machine_name {",
    '  POE::NFA->spawn( ' .
    "inline_states => \\\%$machine_name )->goto_state( " .
    $machine->[MACHINE_START] . " => " .
    $machine->[MACHINE_BODY]->{$machine->[MACHINE_START]}->[STATE_START] .
    " => \@_ );",
    "}",
  );
}

sub generate_method_spawn {
  my $machine_name = shift;
  my $machine = $machines{$machine_name};

  ( "sub spawn {",
    '  POE::NFA->spawn( ' .
    "inline_states => \\\%$machine_name )->goto_state( " .
    $machine->[MACHINE_START] . " => " .
    $machine->[MACHINE_BODY]->{$machine->[MACHINE_START]}->[STATE_START] .
    " => \@_ );",
    "}",
  );
}

sub generate_inline_spawn {
  my $machine_name = shift;
  my $machine = $machines{$machine_name};

  ( 'POE::NFA->spawn( ' .
    "inline_states => \\\%$machine_name )->goto_state( " .
    $machine->[MACHINE_START] . " => " .
    $machine->[MACHINE_BODY]->{$machine->[MACHINE_START]}->[STATE_START] .
    " => \@_ );",
  );
}

#------------------------------------------------------------------------------
# Helpers to generate a sexy regexp from a bunch of words.  These come
# from POE::Preprocessor, which in turn gets them from Text::Trie.

# text_trie_trie is virtually identical to code in Ilya Zakharevich's
# Text::Trie::Trie function.  The minor differences involve hardcoding
# the minimum substring length to 1 and sorting the output.

sub text_trie_trie {
  my @list = @_;
  return shift if @_ == 1;
  my (@trie, %first);

  foreach (@list) {
    my $c = substr $_, 0, 1;
    if (exists $first{$c}) {
      push @{$first{$c}}, $_;
    }
    else {
      $first{$c} = [ $_ ];
    }
  }

  foreach (sort keys %first) {
    # Find common substring
    my $substr = $first{$_}->[0];
    (push @trie, $substr), next if @{$first{$_}} == 1;
    my $l = length($substr);
    foreach (@{$first{$_}}) {
      $l-- while substr($_, 0, $l) ne substr($substr, 0, $l);
    }
    $substr = substr $substr, 0, $l;

    # Feed the trie.
    @list = map {substr $_, $l} @{$first{$_}};
    push @trie, [$substr, text_trie_trie(@list)];
  }

  @trie;
}

# This is basically Text::Trie::walkTrie, but it's hardcoded to generate
# regular expressions.

sub text_trie_as_regexp {
  my @trie   = @_;
  my $num    = 0;
  my $regexp = '';

  foreach (@trie) {
    $regexp .= '|' if $num++;
    if (ref $_ eq 'ARRAY') {
      $regexp .= $_->[0] . '(?:';
      if ($#$_ > 1) {
        $regexp .= text_trie_as_regexp( @{$_}[1 .. $#$_] );
      }
      $regexp .= ')';
    }
    else {
      $regexp .= $_;
    }
  }

  $regexp;
}

__END__

<yrlnry> But I have ceased to be amazed at anything that POE does any
         more, so when I saw that it was using a source filter, I just
         assumed it was Business As Usual For POE, and moved on to try
         to figure out what was unusual about it.

<mrhand> it's something of a redpill at first. It takes some
         adjustment, becuase it seems magical
<mrhand> in fact, Wheel is downright disconcerting at first
<mrhand> but you get used to it
<mrhand> and you don't need Wheel to start out with