#line 1
# vim: set ts=2 sts=2 sw=2 expandtab smarttab:
#
# This file is part of Pod-Markdown
#
# This software is copyright (c) 2011 by Randy Stauner.
#
# This is free software; you can redistribute it and/or modify it under
# the same terms as the Perl 5 programming language system itself.
#
use 5.008;
use strict;
use warnings;

package Pod::Markdown;
# git description: v3.100-2-g1399a61

our $AUTHORITY = 'cpan:RWSTAUNER';
# ABSTRACT: Convert POD to Markdown
$Pod::Markdown::VERSION = '3.101';
use Pod::Simple 3.27 (); # detected_encoding and keep_encoding bug fix
use parent qw(Pod::Simple::Methody);
use Encode ();

our %URL_PREFIXES = (
  sco      => 'http://search.cpan.org/perldoc?',
  metacpan => 'https://metacpan.org/pod/',
  man      => 'http://man.he.net/man',
);
$URL_PREFIXES{perldoc} = $URL_PREFIXES{metacpan};

our $LOCAL_MODULE_RE = qr/^(Local::|\w*?_\w*)/;

## no critic
#{
  our $HAS_HTML_ENTITIES;

  # Stolen from Pod::Simple::XHTML 3.28. {{{

  BEGIN {
    $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
  }

  my %entities = (
    q{>} => 'gt',
    q{<} => 'lt',
    q{'} => '#39',
    q{"} => 'quot',
    q{&} => 'amp',
  );

  sub encode_entities {
    my $self = shift;
    my $ents = $self->html_encode_chars;
    return HTML::Entities::encode_entities( $_[0], $ents ) if $HAS_HTML_ENTITIES;
    if (defined $ents) {
        $ents =~ s,(?<!\\)([]/]),\\$1,g;
        $ents =~ s,(?<!\\)\\\z,\\\\,;
    } else {
        $ents = join '', keys %entities;
    }
    my $str = $_[0];
    $str =~ s/([$ents])/'&' . ($entities{$1} || sprintf '#x%X', ord $1) . ';'/ge;
    return $str;
  }

  # }}}

  # Add a few very common ones for consistency and readability
  # (in case HTML::Entities isn't available).
  %entities = (
    # Pod::Markdown has always required 5.8 so unicode_to_native will be available.
    chr(utf8::unicode_to_native(0xA0)) => 'nbsp',
    chr(utf8::unicode_to_native(0xA9)) => 'copy',
    %entities
  );

  sub __entity_encode_ord_he {
    my $chr = chr $_[0];
    # Skip the encode_entities() logic and go straight for the substitution
    # since we already have the char we know we want replaced.
    # Both the hash and the function are documented as exportable (so should be reliable).
    return $HTML::Entities::char2entity{ $chr } || HTML::Entities::num_entity( $chr );
  }
  sub __entity_encode_ord_basic {
    return '&' . ($entities{chr $_[0]} || sprintf '#x%X', $_[0]) . ';';
  }

  # From HTML::Entities 3.69
  my $DEFAULT_ENTITY_CHARS = '^\n\r\t !\#\$%\(-;=?-~';

#}
## use critic

# Use hash for simple "exists" check in `new` (much more accurate than `->can`).
my %attributes = map { ($_ => 1) }
  qw(
    html_encode_chars
    match_encoding
    output_encoding
    local_module_re
    local_module_url_prefix
    man_url_prefix
    perldoc_url_prefix
    perldoc_fragment_format
    markdown_fragment_format
    include_meta_tags
  );


sub new {
  my $class = shift;
  my %args = @_;

  my $self = $class->SUPER::new();
  $self->preserve_whitespace(1);
  $self->nbsp_for_S(1);
  $self->accept_targets(qw( markdown html ));

  # Default to the global, but allow it to be overwritten in args.
  $self->local_module_re($LOCAL_MODULE_RE);

  for my $type ( qw( perldoc man ) ){
    my $attr  = $type . '_url_prefix';
    # Initialize to the alias.
    $self->$attr($type);
  }

  while( my ($attr, $val) = each %args ){
    # NOTE: Checking exists on a private var means we don't allow Pod::Simple
    # attributes to be set this way.  It's not very consistent, but I think
    # I'm ok with that for now since there probably aren't many Pod::Simple attributes
    # being changed besides `output_*` which feel like API rather than attributes.
    # We'll see.
    # This is currently backward-compatible as we previously just put the attribute
    # into the private stash so anything unknown was silently ignored.
    # We could open this up to `$self->can($attr)` in the future if that seems better
    # but it tricked me when I was testing a misspelled attribute name
    # which also happened to be a Pod::Simple method.

    exists $attributes{ $attr } or
      # Provide a more descriptive message than "Can't locate object method".
      warn("Unknown argument to ${class}->new(): '$attr'"), next;

    # Call setter.
    $self->$attr($val);
  }

  # TODO: call from the setters.
  $self->_prepare_fragment_formats;

  return $self;
}

for my $type ( qw( local_module perldoc man ) ){
  my $attr  = $type . '_url_prefix';
  no strict 'refs'; ## no critic
  *$attr = sub {
    my $self = shift;
    if (@_) {
      $self->{$attr} = $URL_PREFIXES{ $_[0] } || $_[0];
    }
    else {
      return $self->{$attr};
    }
  }
}

## Attribute accessors ##


sub html_encode_chars {
  my $self  = shift;
  my $stash = $self->_private;

  # Setter.
  if( @_ ){
    # If false ('', 0, undef), disable.
    if( !$_[0] ){
      delete $stash->{html_encode_chars};
      $stash->{encode_amp}  = 1;
      $stash->{encode_lt}   = 1;
    }
    else {
      # Special case boolean '1' to mean "all".
      # If we have HTML::Entities, undef will use the default.
      # Without it, we need to specify so that we use the same list (for consistency).
      $stash->{html_encode_chars} = $_[0] eq '1' ? ($HAS_HTML_ENTITIES ? undef : $DEFAULT_ENTITY_CHARS) : $_[0];

      # If [char] doesn't get encoded, we need to do it ourselves.
      $stash->{encode_amp}  = ($self->encode_entities('&') eq '&');
      $stash->{encode_lt}   = ($self->encode_entities('<') eq '<');
    }
    return;
  }

  # Getter.
  return $stash->{html_encode_chars};
}


# I prefer ro-accessors (immutability!) but it can be confusing
# to not support the same API as other Pod::Simple classes.

# NOTE: Pod::Simple::_accessorize is not a documented public API.
# Skip any that have already been defined.
__PACKAGE__->_accessorize(grep { !__PACKAGE__->can($_) } keys %attributes);

sub _prepare_fragment_formats {
  my ($self) = @_;

  foreach my $attr ( keys %attributes ){
    next unless $attr =~ /^(\w+)_fragment_format/;
    my $type = $1;
    my $format = $self->$attr;

    # If one was provided.
    if( $format ){
      # If the attribute is a coderef just use it.
      next if ref($format) eq 'CODE';
    }
    # Else determine a default.
    else {
      if( $type eq 'perldoc' ){
        # Choose a default that matches the destination url.
        my $target = $self->perldoc_url_prefix;
        foreach my $alias ( qw( metacpan sco ) ){
          if( $target eq $URL_PREFIXES{ $alias } ){
            $format = $alias;
          }
        }
        # This seems like a reasonable fallback.
        $format ||= 'pod_simple_xhtml';
      }
      else {
        $format = $type;
      }
    }

    # The short name should become a method name with the prefix prepended.
    my $prefix = 'format_fragment_';
    $format =~ s/^$prefix//;
    die "Unknown fragment format '$format'"
      unless $self->can($prefix . $format);

    # Save it.
    $self->$attr($format);
  }

  return;
}

## Backward compatible API ##

# For backward compatibility (previously based on Pod::Parser):
# While Pod::Simple provides a parse_from_file() method
# it's primarily for Pod::Parser compatibility.
# When called without an output handle it will print to STDOUT
# but the old Pod::Markdown never printed to a handle
# so we don't want to start now.
sub parse_from_file {
  my ($self, $file) = @_;

  # TODO: Check that all dependent cpan modules use the Pod::Simple API
  # then add a deprecation warning here to avoid confusion.

  $self->output_string(\($self->{_as_markdown_}));
  $self->parse_file($file);
}

# Likewise, though Pod::Simple doesn't define this method at all.
sub parse_from_filehandle { shift->parse_from_file(@_) }


## Document state ##

sub _private {
  my ($self) = @_;
  $self->{_Pod_Markdown_} ||= {
    indent      => 0,
    stacks      => [],
    states      => [{}],
    link        => [],
    encode_amp  => 1,
    encode_lt   => 1,
  };
}

sub _increase_indent {
  ++$_[0]->_private->{indent} >= 1
    or die 'Invalid state: indent < 0';
}
sub _decrease_indent {
  --$_[0]->_private->{indent} >= 0
    or die 'Invalid state: indent < 0';
}

sub _new_stack {
  push @{ $_[0]->_private->{stacks} }, [];
  push @{ $_[0]->_private->{states} }, {};
}

sub _last_string {
  $_[0]->_private->{stacks}->[-1][-1];
}

sub _pop_stack_text {
  $_[0]->_private->{last_state} = pop @{ $_[0]->_private->{states} };
  join '', @{ pop @{ $_[0]->_private->{stacks} } };
}

sub _stack_state {
  $_[0]->_private->{states}->[-1];
}

sub _save {
  my ($self, $text) = @_;
  push @{ $self->_private->{stacks}->[-1] }, $text;
  # return $text; # DEBUG
}

sub _save_line {
  my ($self, $text) = @_;

  $text = $self->_process_escapes($text);

  $self->_save($text . $/);
}

# For paragraphs, etc.
sub _save_block {
  my ($self, $text) = @_;

  $self->_stack_state->{blocks}++;

  $self->_save_line($self->_indent($text) . $/);
}

## Formatting ##

sub _chomp_all {
  my ($self, $text) = @_;
  1 while chomp $text;
  return $text;
}

sub _indent {
  my ($self, $text) = @_;
  my $level = $self->_private->{indent};

  if( $level ){
    my $indent = ' ' x ($level * 4);

    # Capture text on the line so that we don't indent blank lines (/^\x20{4}$/).
    $text =~ s/^(.+)/$indent$1/mg;
  }

  return $text;
}

# as_markdown() exists solely for backward compatibility
# and requires having called parse_from_file() to be useful.


sub as_markdown {
    my ($parser, %args) = @_;
    my @header;
    # Don't add meta tags again if we've already done it.
    if( $args{with_meta} && !$parser->include_meta_tags ){
        @header = $parser->_build_markdown_head;
    }
    return join("\n" x 2, @header, $parser->{_as_markdown_});
}

sub _build_markdown_head {
    my $parser    = shift;
    my $data      = $parser->_private;
    return join "\n",
        map  { qq![[meta \l$_="$data->{$_}"]]! }
        grep { defined $data->{$_} }
        qw( Title Author );
}

## Escaping ##

# http://daringfireball.net/projects/markdown/syntax#backslash
# Markdown provides backslash escapes for the following characters:
#
# \   backslash
# `   backtick
# *   asterisk
# _   underscore
# {}  curly braces
# []  square brackets
# ()  parentheses
# #   hash mark
# +   plus sign
# -   minus sign (hyphen)
# .   dot
# !   exclamation mark

# However some of those only need to be escaped in certain places:
# * Backslashes *do* need to be escaped or they may be swallowed by markdown.
# * Word-surrounding characters (/[`*_]/) *do* need to be escaped mid-word
# because the markdown spec explicitly allows mid-word em*pha*sis.
# * I don't actually see anything that curly braces are used for.
# * Escaping square brackets is enough to avoid accidentally
# creating links and images (so we don't need to escape plain parentheses
# or exclamation points as that would generate a lot of unnecesary noise).
# Parentheses will be escaped in urls (&end_L) to avoid premature termination.
# * We don't need a backslash for every hash mark or every hyphen found mid-word,
# just the ones that start a line (likewise for plus and dot).
# (Those will all be handled by _escape_paragraph_markdown).


# Backslash escape markdown characters to avoid having them interpreted.
sub _escape_inline_markdown {
  local $_ = $_[1];

# s/([\\`*_{}\[\]()#+-.!])/\\$1/g; # See comments above.
  s/([\\`*_\[\]])/\\$1/g;

  return $_;
}

# Escape markdown characters that would be interpreted
# at the start of a line.
sub _escape_paragraph_markdown {
    local $_ = $_[1];

    # Escape headings, horizontal rules, (unordered) lists, and blockquotes.
    s/^([-+#>])/\\$1/mg;

    # Markdown doesn't support backslash escapes for equal signs
    # even though they can be used to underline a header.
    # So use html to escape them to avoid having them interpreted.
    s/^([=])/sprintf '&#x%x;', ord($1)/mge;

    # Escape the dots that would wrongfully create numbered lists.
    s/^( (?:>\s+)? \d+ ) (\.\x20)/$1\\$2/xgm;

    return $_;
}


# Additionally Markdown allows inline html so we need to escape things that look like it.
# While _some_ Markdown processors handle backslash-escaped html,
# [Daring Fireball](http://daringfireball.net/projects/markdown/syntax) states distinctly:
# > In HTML, there are two characters that demand special treatment: < and &...
# > If you want to use them as literal characters, you must escape them as entities, e.g. &lt;, and &amp;.

# It goes on to say:
# > Markdown allows you to use these characters naturally,
# > taking care of all the necessary escaping for you.
# > If you use an ampersand as part of an HTML entity,
# > it remains unchanged; otherwise it will be translated into &amp;.
# > Similarly, because Markdown supports inline HTML,
# > if you use angle brackets as delimiters for HTML tags, Markdown will treat them as such.

# In order to only encode the occurrences that require it (something that
# could be interpreted as an entity) we escape them all so that we can do the
# suffix test later after the string is complete (since we don't know what
# strings might come after this one).

my %_escape =
  map {
    my ($k, $v) = split /:/;
    # Put the "code" marker before the char instead of after so that it doesn't
    # get confused as the $2 (which is what requires us to entity-encode it).
    # ( "XsX", "XcsX", "X(c?)sX" )
    my ($s, $code, $re) = map { "\0$_$v\0" } '', map { ($_, '('.$_.'?)') } 'c';

    (
      $k         => $s,
      $k.'_code' => $code,
      $k.'_re'   => qr/$re/,
    )
  }
    qw( amp:& lt:< );

# Make the values of this private var available to the tests.
sub __escape_sequences { %_escape }


# HTML-entity encode any characters configured by the user.
# If that doesn't include [&<] then we escape those chars so we can decide
# later if we will entity-encode them or put them back verbatim.
sub _encode_or_escape_entities {
  my $self  = $_[0];
  my $stash = $self->_private;
  local $_  = $_[1];

  if( $stash->{encode_amp} ){
    if( exists($stash->{html_encode_chars}) ){
      # Escape all amps for later processing.
      # Pass intermediate strings to entity encoder so that it doesn't
      # process any of the characters of our escape sequences.
      # Use -1 to get "as many fields as possible" so that we keep leading and
      # trailing (possibly empty) fields.
      $_ = join $_escape{amp}, map { $self->encode_entities($_) } split /&/, $_, -1;
    }
    else {
      s/&/$_escape{amp}/g;
    }
  }
  elsif( exists($stash->{html_encode_chars}) ){
    $_ = $self->encode_entities($_);
  }

  s/</$_escape{lt}/g
    if $stash->{encode_lt};

  return $_;
}

# From Markdown.pl version 1.0.1 line 1172 (_DoAutoLinks).
my $EMAIL_MARKER = qr{
#   <                  # Opening token is in parent regexp.
        (?:mailto:)?
    (
      [-.\w]+
      \@
      [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
    )
    >
}x;

# Process any escapes we put in the text earlier,
# now that the text is complete (end of a block).
sub _process_escapes {
  my $self  = $_[0];
  my $stash = $self->_private;
  local $_  = $_[1];

  # The patterns below are taken from Markdown.pl 1.0.1 _EncodeAmpsAndAngles().
  # In this case we only want to encode the ones that Markdown won't.
  # This is overkill but produces nicer looking text (less escaped entities).
  # If it proves insufficent then we'll just encode them all.

  # $1: If the escape was in a code sequence, simply replace the original.
  # $2: If the unescaped value would be followed by characters
  #     that could be interpreted as html, entity-encode it.
  # else: The character is safe to leave bare.

  # Neither currently allows $2 to contain '0' so bool tests are sufficient.

  if( $stash->{encode_amp} ){
    # Encode & if succeeded by chars that look like an html entity.
    s,$_escape{amp_re}((?:#?[xX]?(?:[0-9a-fA-F]+|\w+);)?),
      $1 ? '&'.$2 : $2 ? '&amp;'.$2 : '&',egos;
  }

  if( $stash->{encode_lt} ){
    # Encode < if succeeded by chars that look like an html tag.
    # Leave email addresses (<foo@bar.com>) for Markdown to process.
    s,$_escape{lt_re}((?=$EMAIL_MARKER)|(?:[a-z/?\$!])?),
      $1 ? '<'.$2 : $2 ?  '&lt;'.$2 : '<',egos;
  }

  return $_;
}


## Parsing ##

sub handle_text {
  my $self  = $_[0];
  my $stash = $self->_private;
  local $_  = $_[1];

  # Unless we're in a code span, verbatim block, or formatted region.
  unless( $stash->{no_escape} ){

    # We could, in theory, alter what gets escaped according to context
    # (for example, escape square brackets (but not parens) inside link text).
    # The markdown produced might look slightly nicer but either way you're
    # at the whim of the markdown processor to interpret things correctly.
    # For now just escape everything.

    # Don't let literal characters be interpreted as markdown.
    $_ = $self->_escape_inline_markdown($_);

    # Entity-encode (or escape for later processing) necessary/desired chars.
    $_ = $self->_encode_or_escape_entities($_);

  }
  # If this _is_ a code section, do limited/specific handling.
  else {
    # Always escaping these chars ensures that we won't mangle the text
    # in the unlikely event that a sequence matching our escape occurred in the
    # input stream (since we're going to escape it and then unescape it).
    s/&/$_escape{amp_code}/gos if $stash->{encode_amp};
    s/</$_escape{lt_code}/gos  if $stash->{encode_lt};
  }

  $self->_save($_);
}

sub start_Document {
  my ($self) = @_;
  $self->_new_stack;
}

sub   end_Document {
  my ($self) = @_;
  $self->_check_search_header;
  my $end = pop @{ $self->_private->{stacks} };

  @{ $self->_private->{stacks} } == 0
    or die 'Document ended with stacks remaining';

  my @doc = $self->_chomp_all(join('', @$end)) . $/;

  if( $self->include_meta_tags ){
    unshift @doc, $self->_build_markdown_head, ($/ x 2);
  }

  if( my $encoding = $self->_get_output_encoding ){
    # Do the check outside the loop(s) for efficiency.
    my $ents = $HAS_HTML_ENTITIES ? \&__entity_encode_ord_he : \&__entity_encode_ord_basic;
    # Iterate indices to avoid copying large strings.
    for my $i ( 0 .. $#doc ){
      print { $self->{output_fh} } Encode::encode($encoding, $doc[$i], $ents);
    }
  }
  else {
    print { $self->{output_fh} } @doc;
  }
}

sub _get_output_encoding {
  my ($self) = @_;

  # If 'match_encoding' is set we need to return an encoding.
  # If pod has no =encoding, Pod::Simple will guess if it sees a high-bit char.
  # If there are no high-bit chars, encoding is undef.
  # Use detected_encoding() rather than encoding() because if Pod::Simple
  # can't use whatever encoding was specified, we probably can't either.
  # Fallback to 'o_e' if no match is found.  This gives the user the choice,
  # since otherwise there would be no reason to specify 'o_e' *and* 'm_e'.
  # Fallback to UTF-8 since it is a reasonable default these days.

  return $self->detected_encoding || $self->output_encoding || 'UTF-8'
    if $self->match_encoding;

  # If output encoding wasn't specified, return false.
  return $self->output_encoding;
}

## Blocks ##

sub start_Verbatim {
  my ($self) = @_;
  $self->_new_stack;
  $self->_private->{no_escape} = 1;
}

sub end_Verbatim {
  my ($self) = @_;

  my $text = $self->_pop_stack_text;

  $text = $self->_indent_verbatim($text);

  $self->_private->{no_escape} = 0;

  # Verbatim blocks do not generate a separate "Para" event.
  $self->_save_block($text);
}

sub _indent_verbatim {
  my ($self, $paragraph) = @_;

    # NOTE: Pod::Simple expands the tabs for us (as suggested by perlpodspec).
    # Pod::Simple also has a 'strip_verbatim_indent' attribute
    # but it doesn't sound like it gains us anything over this method.

    # POD verbatim can start with any number of spaces (or tabs)
    # markdown should be 4 spaces (or a tab)
    # so indent any paragraphs so that all lines start with at least 4 spaces
    my @lines = split /\n/, $paragraph;
    my $indent = ' ' x 4;
    foreach my $line ( @lines ){
        next unless $line =~ m/^( +)/;
        # find the smallest indentation
        $indent = $1 if length($1) < length($indent);
    }
    if( (my $smallest = length($indent)) < 4 ){
        # invert to get what needs to be prepended
        $indent = ' ' x (4 - $smallest);

        # Prepend indent to each line.
        # We could check /\S/ to only indent non-blank lines,
        # but it's backward compatible to respect the whitespace.
        # Additionally, both pod and markdown say they ignore blank lines
        # so it shouldn't hurt to leave them in.
        $paragraph = join "\n", map { length($_) ? $indent . $_ : '' } @lines;
    }

  return $paragraph;
}

sub start_Para {
  $_[0]->_new_stack;
}

sub   end_Para {
  my ($self) = @_;
  my $text = $self->_pop_stack_text;

  $text = $self->_escape_paragraph_markdown($text);

  $self->_save_block($text);
}


## Headings ##

sub start_head1 { $_[0]->_start_head(1) }
sub   end_head1 { $_[0]->_end_head(1) }
sub start_head2 { $_[0]->_start_head(2) }
sub   end_head2 { $_[0]->_end_head(2) }
sub start_head3 { $_[0]->_start_head(3) }
sub   end_head3 { $_[0]->_end_head(3) }
sub start_head4 { $_[0]->_start_head(4) }
sub   end_head4 { $_[0]->_end_head(4) }

sub _check_search_header {
  my ($self) = @_;
  # Save the text since the last heading if we want it for metadata.
  if( my $last = $self->_private->{search_header} ){
    for( $self->_private->{$last} = $self->_last_string ){
      s/\A\s+//;
      s/\s+\z//;
    }
  }
}
sub _start_head {
  my ($self) = @_;
  $self->_check_search_header;
  $self->_new_stack;
}

sub   _end_head {
  my ($self, $num) = @_;
  my $h = '#' x $num;

  my $text = $self->_pop_stack_text;
  $self->_private->{search_header} =
      $text =~ /NAME/   ? 'Title'
    : $text =~ /AUTHOR/ ? 'Author'
    : undef;

  # TODO: option for $h suffix
  # TODO: put a name="" if $self->{embed_anchor_tags}; ?
  # https://rt.cpan.org/Ticket/Display.html?id=57776
  $self->_save_block(join(' ', $h, $text));
}

## Lists ##

# With Pod::Simple->parse_empty_lists(1) there could be an over_empty event,
# but what would you do with that?

sub _start_list {
  my ($self) = @_;
  $self->_new_stack;

  # Nest again b/c start_item will pop this to look for preceding content.
  $self->_increase_indent;
  $self->_new_stack;
}

sub   _end_list {
  my ($self) = @_;
  $self->_handle_between_item_content;

  # Finish the list.

  # All the child elements should be blocks,
  # but don't end with a double newline.
  my $text = $self->_chomp_all($self->_pop_stack_text);

  $_[0]->_save_line($text . $/);
}

sub _handle_between_item_content {
  my ($self) = @_;

  # This might be empty (if the list item had no additional content).
  if( my $text = $self->_pop_stack_text ){
    # Else it's a sub-document.
    # If there are blocks we need to separate with blank lines.
    if( $self->_private->{last_state}->{blocks} ){
      $text = $/ . $text;
    }
    # If not, we can condense the text.
    # In this module's history there was a patch contributed to specifically
    # produce "huddled" lists so we'll try to maintain that functionality.
    else {
      $text = $self->_chomp_all($text) . $/;
    }
    $self->_save($text)
  }

  $self->_decrease_indent;
}

sub _start_item {
  my ($self) = @_;
  $self->_handle_between_item_content;
  $self->_new_stack;
}

sub   _end_item {
  my ($self, $marker) = @_;
  my $text = $self->_pop_stack_text;
  $self->_save_line($self->_indent($marker .
    # Add a space only if there is text after the marker.
    (defined($text) && length($text) ? ' ' . $text : '')
  ));

  # Store any possible contents in a new stack (like a sub-document).
  $self->_increase_indent;
  $self->_new_stack;
}

sub start_over_bullet { $_[0]->_start_list }
sub   end_over_bullet { $_[0]->_end_list }

sub start_item_bullet { $_[0]->_start_item }
sub   end_item_bullet { $_[0]->_end_item('-') }

sub start_over_number { $_[0]->_start_list }
sub   end_over_number { $_[0]->_end_list }

sub start_item_number {
  $_[0]->_start_item;
  # It seems like this should be a stack,
  # but from testing it appears that the corresponding 'end' event
  # comes right after the text (it doesn't surround any embedded content).
  # See t/nested.t which shows start-item, text, end-item, para, start-item....
  $_[0]->_private->{item_number} = $_[1]->{number};
}

sub   end_item_number {
  my ($self) = @_;
  $self->_end_item($self->_private->{item_number} . '.');
}

# Markdown doesn't support definition lists
# so do regular (unordered) lists with indented paragraphs.
sub start_over_text { $_[0]->_start_list }
sub   end_over_text { $_[0]->_end_list }

sub start_item_text { $_[0]->_start_item }
sub   end_item_text { $_[0]->_end_item('-')}


# perlpodspec equates an over/back region with no items to a blockquote.
sub start_over_block {
  # NOTE: We don't actually need to indent for a blockquote.
  $_[0]->_new_stack;
}

sub   end_over_block {
  my ($self) = @_;

  # Chomp first to avoid prefixing a blank line with a `>`.
  my $text = $self->_chomp_all($self->_pop_stack_text);

  # NOTE: Paragraphs will already be escaped.

  # I don't really like either of these implementations
  # but the join/map/split seems a little better and benches a little faster.
  # You would lose the last newline but we've already chomped.
  #$text =~ s{^(.)?}{'>' . (defined($1) && length($1) ? (' ' . $1) : '')}mge;
  $text = join $/, map { length($_) ? '> ' . $_ : '>' } split qr-$/-, $text;

  $self->_save_block($text);
}

## Custom Formats ##

sub start_for {
  my ($self, $attr) = @_;
  $self->_new_stack;

  if( $attr->{target} eq 'html' ){
    # Use another stack so we can indent
    # (not syntactily necessary but seems appropriate).
    $self->_new_stack;
    $self->_increase_indent;
    $self->_private->{no_escape} = 1;
    # Mark this so we know to undo it.
    $self->_stack_state->{for_html} = 1;
  }
}

sub end_for {
  my ($self) = @_;
  # Data gets saved as a block (which will handle indents),
  # but if there was html we'll alter this, so chomp and save a block again.
  my $text = $self->_chomp_all($self->_pop_stack_text);

  if( $self->_private->{last_state}->{for_html} ){
    $self->_private->{no_escape} = 0;
    # Save it to the next stack up so we can pop it again (we made two stacks).
    $self->_save($text);
    $self->_decrease_indent;
    $text = join "\n", '<div>', $self->_chomp_all($self->_pop_stack_text), '</div>';
  }

  $self->_save_block($text);
}

# Data events will be emitted for any formatted regions that have been enabled
# (by default, `markdown` and `html`).

sub start_Data {
  my ($self) = @_;
  # TODO: limit this to what's in attr?
  $self->_private->{no_escape}++;
  $self->_new_stack;
}

sub   end_Data {
  my ($self) = @_;
  my $text = $self->_pop_stack_text;
  $self->_private->{no_escape}--;
  $self->_save_block($text);
}

## Codes ##

sub start_B { $_[0]->_save('**') }
sub   end_B { $_[0]->start_B()   }

sub start_I { $_[0]->_save('_') }
sub   end_I { $_[0]->start_I()  }

sub start_C {
  my ($self) = @_;
  $self->_new_stack;
  $self->_private->{no_escape}++;
}

sub   end_C {
  my ($self) = @_;
  $self->_private->{no_escape}--;
  $self->_save( $self->_wrap_code_span($self->_pop_stack_text) );
}

# Use code spans for F<>.
sub start_F { shift->start_C(@_); }
sub   end_F { shift  ->end_C(@_); }

sub start_L {
  my ($self, $flags) = @_;
  $self->_new_stack;
  push @{ $self->_private->{link} }, $flags;
}

sub   end_L {
  my ($self) = @_;
  my $flags = pop @{ $self->_private->{link} }
    or die 'Invalid state: link end with no link start';

  my ($type, $to, $section) = @{$flags}{qw( type to section )};

  my $url = (
    $type eq 'url' ? $to
      : $type eq 'man' ? $self->format_man_url($to, $section)
      : $type eq 'pod' ? $self->format_perldoc_url($to, $section)
      :                  undef
  );

  my $text = $self->_pop_stack_text;

  # NOTE: I don't think the perlpodspec says what to do with L<|blah>
  # but it seems like a blank link text just doesn't make sense
  if( !length($text) ){
    $text =
      $section ?
        $to ? sprintf('"%s" in %s', $section, $to)
        : ('"' . $section . '"')
      : $to;
  }

  # FIXME: What does Pod::Simple::X?HTML do for this?
  # if we don't know how to handle the url just print the pod back out
  if (!$url) {
    $self->_save(sprintf 'L<%s>', $flags->{raw});
    return;
  }

  # In the url we need to escape quotes and parentheses lest markdown
  # break the url (cut it short and/or wrongfully interpret a title).

  # Backslash escapes do not work for the space and quotes.
  # URL-encoding the space is not sufficient
  # (the quotes confuse some parsers and produce invalid html).
  # I've arbitratily chosen HTML encoding to hide them from markdown
  # while mangling the url as litle as possible.
  $url =~ s/([ '"])/sprintf '&#x%x;', ord($1)/ge;

  # We also need to double any backslashes that may be present
  # (lest they be swallowed up) and stop parens from breaking the url.
  $url =~ s/([\\()])/\\$1/g;

  # TODO: put section name in title if not the same as $text
  $self->_save('[' . $text . '](' . $url . ')');
}

sub start_X {
  $_[0]->_new_stack;
}

sub   end_X {
  my ($self) = @_;
  my $text = $self->_pop_stack_text;
  # TODO: mangle $text?
  # TODO: put <a name="$text"> if configured
}

# A code span can be delimited by multiple backticks (and a space)
# similar to pod codes (C<< code >>), so ensure we use a big enough
# delimiter to not have it broken by embedded backticks.
sub _wrap_code_span {
  my ($self, $arg) = @_;
  my $longest = 0;
  while( $arg =~ /([`]+)/g ){
    my $len = length($1);
    $longest = $len if $longest < $len;
  }
  my $delim = '`' x ($longest + 1);
  my $pad = $longest > 0 ? ' ' : '';
  return $delim . $pad . $arg . $pad . $delim;
}

## Link Formatting (TODO: Move this to another module) ##


sub format_man_url {
    my ($self, $to) = @_;
    my ($page, $part) = ($to =~ /^ ([^(]+) (?: \( (\S+) \) )? /x);
    return $self->man_url_prefix . ($part || 1) . '/' . ($page || $to);
}


sub format_perldoc_url {
  my ($self, $name, $section) = @_;

  my $url_prefix = defined($name)
    && $self->is_local_module($name)
    && $self->local_module_url_prefix
    || $self->perldoc_url_prefix;

  my $url = '';

  # If the link is to another module (external link).
  if ($name) {
    $url = $url_prefix . $name;
  }

  # See https://rt.cpan.org/Ticket/Display.html?id=57776
  # for a discussion on the need to mangle the section.
  if ($section){

    my $method = $url
      # If we already have a prefix on the url it's external.
      ? $self->perldoc_fragment_format
      # Else an internal link points to this markdown doc.
      : $self->markdown_fragment_format;

    $method = 'format_fragment_' . $method
      unless ref($method);

    {
      # Set topic to enable code refs to be simple.
      local $_ = $section;
      $section = $self->$method($section);
    }

    $url .= '#' . $section;
  }

  return $url;
}


# TODO: simple, pandoc, etc?

sub format_fragment_markdown {
  my ($self, $section) = @_;

  # If this is an internal link (to another section in this doc)
  # we can't be sure what the heading id's will look like
  # (it depends on what is rendering the markdown to html)
  # but we can try to follow popular conventions.

  # http://johnmacfarlane.net/pandoc/demo/example9/pandocs-markdown.html#header-identifiers-in-html-latex-and-context
  #$section =~ s/(?![-_.])[[:punct:]]//g;
  #$section =~ s/\s+/-/g;
  $section =~ s/\W+/-/g;
  $section =~ s/-+$//;
  $section =~ s/^-+//;
  $section = lc $section;
  #$section =~ s/^[^a-z]+//;
  $section ||= 'section';

  return $section;
}


{
  # From Pod::Simple::XHTML 3.28.
  # The strings gets passed through encode_entities() before idify().
  # If we don't do it here the substitutions below won't operate consistently.

  sub format_fragment_pod_simple_xhtml {
    my ($self, $t) = @_;

    # encode_entities {
      # We need to use the defaults in case html_encode_chars has been customized
      # (since the purpose is to match what external sources are doing).

      local $self->_private->{html_encode_chars};
      $t = $self->encode_entities($t);
    # }

    # idify {
      for ($t) {
          s/<[^>]+>//g;            # Strip HTML.
          s/&[^;]+;//g;            # Strip entities.
          s/^\s+//; s/\s+$//;      # Strip white space.
          s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
          s/^[^a-zA-Z]+//;         # First char must be a letter.
          s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
          s/[-:.]+$//;             # Strip trailing punctuation.
      }
    # }

    return $t;
  }
}


sub format_fragment_pod_simple_html {
  my ($self, $section) = @_;

  # From Pod::Simple::HTML 3.28.

  # section_name_tidy {
    $section =~ s/^\s+//;
    $section =~ s/\s+$//;
    $section =~ tr/ /_/;
    $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters

    #$section = $self->unicode_escape_url($section);
      # unicode_escape_url {
      $section =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
        #  Turn char 1234 into "(1234)"
      # }

    $section = '_' unless length $section;
    return $section;
  # }
}


sub format_fragment_metacpan { shift->format_fragment_pod_simple_xhtml(@_); }
sub format_fragment_sco      { shift->format_fragment_pod_simple_html(@_);  }


sub is_local_module {
  my ($self, $name) = @_;

  return ($name =~ $self->local_module_re);
}

1;

__END__

#line 1669