# coding: utf-8
=begin

 * Name: SiSU

 * Description: a framework for document structuring, publishing and search

 * Author: Ralph Amissah

 * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved.

 * License: GPL 3 or later:

   SiSU, a framework for document structuring, publishing and search

   Copyright (C) Ralph Amissah

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program. If not, see <http://www.gnu.org/licenses/>.

   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   <http://www.fsf.org/licensing/licenses/gpl.html>
   <http://www.gnu.org/licenses/gpl.html>

   <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>

 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system

 * Hompages:
   <http://www.jus.uio.no/sisu>
   <http://www.sisudoc.org>

 * Download:
   <http://www.jus.uio.no/sisu/SiSU/download.html>

 * Ralph Amissah
   <ralph@amissah.com>
   <ralph.amissah@gmail.com>

 ** Description: plaintext text generation, stripped plaintext output (unix,
     linefeed)

=end
module SiSU_Plaintext
  require "#{SiSU_lib}/dal"
  require "#{SiSU_lib}/sysenv"
  include SiSU_Env
  include SiSU_Param
  include SiSU_Viz
  require "#{SiSU_lib}/plaintext_format"
  include SiSU_Plaintext_format
  require "#{SiSU_lib}/shared_txt"
  require "#{SiSU_lib}/shared_structure"
  pwd=Dir.pwd
  @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
  @@tablefoot=''
  class Source
    def initialize(opt)
      @opt=opt
      unless @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/
       puts "#{sf} not a processed file type"
      end
    end
    def read
      begin
        @md=SiSU_Param::Parameters.new(@opt).get
        @env=SiSU_Env::Info_env.new(@opt.fns)
        path=@env.path.output_tell
        tool=if @opt.cmd =~/[MVv]/; "#{@env.program.text_editor} #{path}/#{@md.fnb}/#{@md.fn[:plain]}"
        else ''
        end
        tell=SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool)
        tell.green_hi_blue unless @opt.cmd =~/q/
        tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:plain]}")
        tell.flow if @opt.cmd =~/[MV]/
        my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns)
        @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here
        SiSU_Plaintext::Source::Scroll.new(@dal_array,@md).songsheet
        SiSU_Env::Info_skin.new(@md).select #watch
      rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
      ensure
      end
    end
    private
    class Scroll <Source
      require "#{SiSU_lib}/defaults"
      require "#{SiSU_lib}/shared_txt"
      include SiSU_text_utils
      @@endnotes={ :para=>[],:end=>[] }
      @@dp=nil
      def initialize(data,md)
        @data,@md=data,md
        @url_brace=SiSU_Viz::Skin.new.url_decoration
        @vz=SiSU_Env::Get_init.instance.skin
        @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
        @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m # 2004w18 pb pn removal added
        @tab="\t"
        @@endnotes_=case md.mod.inspect
        when /--footnote/; false
        when /--endnote/; true
        else true
        end
        @br=case md.mod.inspect
        when /--dos/; "\r\n"
        when /--unix/; "\n"
        else "\n"
        end
        @plaintext={ :body=>[],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[] }
      end
      def songsheet
        plaintext=markup(@data)
        publish(plaintext)
      end
      # Used for extraction of endnotes from paragraphs
      def extract_endnotes(para='')
        notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/)
        @n=[]
        notes.flatten.each do |n| #high cost to deal with <br> appropriately within plaintext, consider
          n=n.dup.to_s
          if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/
            fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added
            fix.each do |x|
              unless x.empty?; @n << x
              end
            end
          else                 @n << n
          end
        end
        notes=@n.flatten
        notes.each do |e|
          util=if e.to_s =~/^\[[\d*+]+\]:/; SiSU_text_utils::Wrap.new(e.to_s,78,4,1)
          else                              SiSU_text_utils::Wrap.new(e.to_s,78,1,1)
          end
          wrap=util.line_wrap
          if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m
            wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, <<GSUB
\\1[\\2]: \\3
GSUB
                      )
          else
            wrap.gsub!(/^(.+)\Z/m, <<GSUB
\\1
GSUB
                      )
          end
          @@endnotes[:para] << "-#{wrap}"
          @@endnotes[:end] << '' << wrap
        end
        @@endnotes
      end
      def plaintext_metadata(meta)
        util=SiSU_text_utils::Wrap.new(meta.text,78,15,1)
        txt=util.line_wrap
        @plaintext[:metadata] <<= if meta.type == 'meta'
          <<WOK

#{@tab}#{meta.el}: #{txt}
WOK
        else ''
        end
      end
      def plaintext_tail
        SiSU_Env::Info_skin.new(@md).select
        vz=SiSU_Env::Get_init.instance.skin
        generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})"  if @md.sisu_version[:version]
        lastdone="Last Generated on: #{Time.now}"
        rubyv="Ruby version: #{@md.ruby_version}"
        sc=if @md.sc_info
          "Source file:    #{@md.sc_filename}#{@br}Version number: #{@md.sc_number}#{@br}Version date:   #{@md.sc_date}#{@br}"
        else ''
        end
        @plaintext[:tail] <<<<WOK
#@br
Other versions of this document: #@br
manifest:
   #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:manifest]}#@br
html:
   #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:toc]}#@br
pdf:
   #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:pdf_p]}
   #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:pdf_l]}#@br
plaintext (plain text):
   #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:plain]}#@br
at:
   #{vz.url_site}#@br

#{sc}
* #{generator}
* #{rubyv}
* #{lastdone}
* SiSU #{vz.url_sisu}
WOK
      end
      def plaintext_structure(para='',lv='',ocn='',hname='') #% Used to extract the structure of a document
        lv=lv.to_i
        n=lv - 1
        n3=lv + 2
        lv=nil if lv == 0
        wrapped=if para[@regx]
          paragraph=para[@regx,2]
          if paragraph =~/#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/
            m=$1.to_i
            paragraph.gsub!(/#{Mx[:pa_o]}:i#{m}#{Mx[:pa_c]}/,'')
            util=SiSU_text_utils::Wrap.new(paragraph,78,m*2)
          else util=SiSU_text_utils::Wrap.new(paragraph.gsub(/#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/,''),78,0)
          end
          util.line_wrap
        end
        if lv
          times=wrapped.length
          times=78 if times > 78
          @plaintext[:body] << case lv
          when 1;      wrapped.upcase << @br << '*'*times << @br
          when 2..3;   wrapped.upcase << @br << '='*times << @br
          when 4;      wrapped.upcase << @br << '-'*times << @br
          when 5..6;   wrapped.upcase << @br << '.'*times << @br
          end
        else
          @plaintext[:body] << wrapped << @br # main text, contents, body KEEP
        end
        if @@endnotes[:para] \
        and not @@endnotes_
          @plaintext[:body] << @br
          @@endnotes[:para].each {|e| @plaintext[:body] << e << @br}
        elsif @@endnotes[:para] \
        and @@endnotes_
          @plaintext[:body] << @br*2
        end
        @@endnotes[:para]=[]
      end
      def markup(data)                                                       # Used for major markup instructions
        dir=SiSU_Env::Info_env.new(@md.fns)
        @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]}
        (0..6).each { |x| @cont[x]=@level[x]=false }
        (4..6).each { |x| @plaintext_contents_close[x]='' }
        plaintext_tail #($1,$2)
        table_message='[table omitted, see other document formats]'
        fix=[]
        data.each do |para|
          para.gsub!(/#{Mx[:id_o]}~0;0:0;x\d+#{Mx[:id_c]}/,'') # if book index? remove
          para.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#@br#{table_message}")
          para.gsub!(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,'')                                           # remove dummy headings (used by html) #check
          para.gsub!(/#{Mx[:gl_bullet]}\s*/,'* ')                                          # bullet markup, marked down
          para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*\1*')
          para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/\1/')
          para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'[\1]')
          para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_\1_')
          para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^\1^')
          para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+\1+')
          para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"')
          para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'-\1-')
          unless para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/
            para.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}((?:https?|file|ftp):\/\/\S+|image)/,'\1 [link:] \2')
            para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3")
            para.gsub!(/_((?:https?|file|ftp):\/\/\S+)/,'\1')
            extract_endnotes(para)
            para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]') # endnote marker marked up
            para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]') # endnote marker marked up
            para.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<')
            para.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>')
            para.gsub!(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&')
            para.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!')
            para.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#')
            para.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*')
            para.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-')
            para.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/')
            para.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_')
            para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{')
            para.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}')
            para.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~')
            para.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©')
          end
          if para =~/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/ ##{Mx[:gr_o]}codeline#{Mx[:gr_c]}
            if para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #code-block: angle brackets special characters
              para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _<
              para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_<
            end
            para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n")                                   # watch
            para.gsub!(/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,'')
          else para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n")                                   # watch introduces a bug
          end
          para.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'')                                         # remove page breaks
          para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') # remove empty lines - check
          para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3')
          para.gsub!(/<a href=".+?">(.+?)<\/a>/m,'\1')
          para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'')                                       # remove name links
          para.gsub!(/&nbsp;|#{Mx[:nbsp]}/,' ')                                            # decide on
          para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,'    [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]")
          para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')
          #para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')
          wordlist=para.scan(/\S+/)
          if para =~/^#{Rx[:meta]}\s*(.+?)\Z/m # for headers
            d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta
            if d_meta; plaintext_metadata(d_meta)
           end
          end
          if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/
            if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change
              paranum=para[@regx,3]
              @p_num=SiSU_Plaintext_format::Paragraph_number.new(paranum)
            end
            @sto=SiSU_Structure::Split_text_object.new(@md,para).txt
            ### problem in scroll, it appears tables are getting paragraph numbers
            m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
            if para =~m \
            and para=~/\S+/
              para=case @sto.format
              when /^(1):(\S*?)/
                plaintext_structure(para,$1,@sto.ocn,$2)
                @sto.lev_para_ocn.heading_body1
              when /^(2):(\S*?)/
                plaintext_structure(para,$1,@sto.ocn,$2)
                @sto.lev_para_ocn.heading_body2
              when /^(3):(\S*?)/
                plaintext_structure(para,$1,@sto.ocn,$2)
                @sto.lev_para_ocn.heading_body3
              when /^(4):(\S+?)/ # work on see SiSU_text_parts::Split_text_object
                plaintext_structure(para,$1,@sto.ocn,$2)
                @sto.lev_para_ocn.heading_body4
              when /^(5):(\S*?)/
                plaintext_structure(para,$1,@sto.ocn,$2)
                @sto.lev_para_ocn.heading_body5
              when /^(6):(\S*?)/
                plaintext_structure(para,$1,@sto.ocn,$2)
                @sto.lev_para_ocn.heading_body6
              else
                plaintext_structure(para,nil,nil,nil) #watch may be problematic
                para
              end
            elsif para =~/#{table_message}/
              @plaintext[:body] << para << @br
            elsif para =~/(Note|Endnotes?)/ \
            and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
            elsif para =~/(MetaData)/ \
            and para =~/#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info ####suspect visit
              #formatMono=MonoSiSU.new('<br /><a name="metadata">MetaData</a>')
              #para=formatMono.bold_para
            elsif para.include? 'Owner Details' \
            and para !~/#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
              #formatMono=MonoSiSU.new('<br /><a name="owner.details">Owner Details</a>')
              #@plaintext[:owner_details]=formatMono.bold_para
              #para=''
            elsif para =~/(#{Mx[:tc_p]}|#{Mx[:gr_o]}Th?)/u #tables ! check
            end
            para='' if (para =~/<a name="n\d+">/ \
            and para =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote
            case para
            when /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/
              if para =~/.*<:#>.*$/m
                txt_obj={:txt =>para}
                format_text=Format_text_object.new(@md,txt_obj)
                para=format_text.scr_indent_one_no_paranum
              end
            end
            if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/
              # i don't get the condition for no paranum
            end
            para.gsub!(/#{Mx[:id_o]}.+?#{Mx[:id_c]}/,' ') if para ## Clean Prepared Text
            para.gsub!(/<!.+!>/,' ') if para ## Clean Prepared Text
            para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text
          end
        end
        @plaintext
      end
      def publish(plaintext)
        divider='='
        content=[]
        content << plaintext[:open]
        content << plaintext[:head]
        content << plaintext[:body]
        content << @@endnotes[:end] if @@endnotes_
        content << "#{@br}#{divider*78}#{@br}"
        content << plaintext[:metadata]
        content << "#@br#{divider*78}#@br" if @md.stmp =~/\w+/ #not used?
        content << plaintext[:owner_details] if @md.stmp =~/\w+/ #not used?
        content << plaintext[:tail]
        Output.new(content,@md).plaintext
        @@endnotes={ :para=>[],:end=>[] }
      end
    end
    class Output <Source
      include SiSU_Param
      include SiSU_Env
      def initialize(content,md)
        @content,@md=content,md
      end
      def plaintext                                                            #%plaintext output
        SiSU_Env::SiSU_file.new(@md).mkdir
        file_plaintext=SiSU_Env::SiSU_file.new(@md,@md.fn[:plain]).mkfile
        @sisu=[]
        @content.each do |para|                                                # this is a hack
          if para.class==Array \
          and para.length > 0
            para.each do |line|
              line.gsub!(/\s+$/m,'')
              file_plaintext.puts line           #unix plaintext
            end
          else file_plaintext.puts para          #unix plaintext # /^([*=-]|\.){5}/
          end
        end
        file_plaintext.close
      end
    end
  end
end
__END__
&#033;\|&#035;\|&&#042;\|&#045;\|&#047;\|&#095;\|&#123;\|&#125;\|&#126;\|&#
