*=*=*=*= ToTag.html =*=*=*=*

Integer Function ToTag


      Integer Function ToTag(length,word,tag)
      implicit integer(a-z)
      parameter (mxnoun=1000,mxlen=50,fnice=1.0)
      common /nouns/ nnoun,len_noun,weight_noun,freq_noun,noun
      character*(mxlen) noun(mxnoun)
      character*(mxlen) clower,clotag
      integer len_noun(mxnoun),weight_noun(mxnoun)
      integer freq_noun(mxnoun)
      character*(*) word,tag
c
      totag = length
      tag = word
      if(length.eq.0) return
      clotag = clower(word,length)
      do 1 i=1,nnoun
         if(len_noun(i).ne.length) goto 1
         if(noun(i)(:length).ne.clotag(:length)) goto 1
c
c if first occurence, make reference, otherwise link
c
         if(weight_noun(i).eq.0) then
            weight_noun(i) = 1
            tag = 'clotag(:length)//
     &            '">'//word(:length)//''
            totag = 15 + 2*length
         else if(weight_noun(i).eq.1) then
c
c link
c
            tag = 'clotag(:length)//
     &            '">'//word(:length)//''
            totag = 16 + 2*length
         else if(weight_noun(i).eq.2) then
c
c external filename
c
            tag = clotag(:length)//
     &            '">'//word(:length)//''
            totag = 15 + 2*length

         endif
         return
    1 continue
      end

*=*=*=*= Txt2HTML.html =*=*=*=*

Program Txt2HTML


      Program Txt2HTML
      implicit integer(a-z)
      parameter (mxlin=250,mxlen=50,mxnoun=1000,mxpar=20000)
      parameter (mxpass=2,mxtag=100)
      parameter (fnice=1.0)
      common /nouns/ nnoun,len_noun,weight_noun,freq_noun,noun
c
      character*(mxtag) tag
      character*(mxlin) cline
      character*(mxlen) noun(mxnoun)
      character*(mxlen) word,lastword,nextword,sfile
      character*(mxpar) para,temp,chtml
      character*16 cpunct /'-:;,/.?!"''(){}[]'/
      character*10 tchar
      character*3 ccthree
      character*1 c,cc,cclast
c
      integer len_noun(mxnoun),weight_noun(mxnoun)
      integer freq_noun(mxnoun)
c
      real f
c
      logical lpunct,lalpha,lnumbr,lclaus
c
      external lenocc,totag
c
c statement functions
c
      lclaus(c) = index(cpunct,c).gt.5
      lalpha(c) = (lge(c,'a').and.lle(c,'z')).or.
     &            (lge(c,'A').and.lle(c,'Z'))
      lnumbr(c) = lge(c,'0').and.lle(c,'9')
c
c      call getarg(1,sfile,status)
c      if(status.le.0) goto 900
c      open(1,file=sfile(:status),status='old',err=900)
      open(1,status='old',readonly,err=900)
c      open(7,file='user',title='Candidate Nouns',iofocus=.true.)
c
      nnoun = 0
      lpar = 0
      pass = 1
c
c Read the file and accumulate the paragraphs
c
    1 read(1,'(a)',end=2,err=2) cline
c
      ll = lenocc(cline)
c
      if(ll.le.0.and.lpar.ne.0) then
c
c empty line ... process words in preceding paragraph
c
         lhtml = 0
         lwordl = 0
         lwordn = 0
         lword = 0
         lastword = ' '
         nextword = ' '
         word = ' '
         mark = 1
         markl = 0
         markn = 0
         ipos = 0
         cc = char(1)
    3    ipos = ipos+1
         cclast = cc
         if(ipos.gt.lpar) goto 4
         cc = para(ipos:ipos)
         if(cc.eq.char(9)) cc = ' '
         good3 = 1
         if(ipos.gt.1.and.ipos.lt.lpar) then
            ccthree=para(ipos-1:ipos+1)
            if(lalpha(cclast).or.lnumbr(cclast)) good3=2
            if(lalpha(para(ipos+1:ipos+1)).or.
     &         lnumbr(para(ipos+1:ipos+1))) good3=good3+1
         endif
c
c add characters to nextword
c
         if(lalpha(cc).or.lnumbr(cc)) then
            if(lwordn.lt.mxlen) then
               lwordn = lwordn+1
               nextword(lwordn:lwordn) = cc
            endif
            goto 3
c
c special for files ... assumes structure like A.B !
c
         else if(cc.eq.'.'.and.good3.eq.3) then
            if(lwordn.lt.mxlen) then
               lwordn = lwordn+1
               nextword(lwordn:lwordn) = cc
            endif
            goto 3
         else if(ccthree(2:3).eq.':\'.and.lalpha(cclast)) then
            if(lwordn.lt.mxlen-1) then
               lwordn = lwordn+2
               nextword(lwordn-1:lwordn) = ':\'
               ipos = ipos+1
            endif
            goto 3
         else if(cc.eq.'\'.and.index(nextword(:lwordn),'\').ne.0) then
            if(lwordn.lt.mxlen) then
               lwordn = lwordn+1
               nextword(lwordn:lwordn) = cc
            endif
            goto 3
         else if(pass.lt.mxpass) then
            markn = 0
            if(lclaus(cc)) markn = 1
c
c end of word.
c
            if(lword*lwordn.ne.0.and.pass.lt.mxpass) then
               call context(lastword(:lwordl),
     &                      word(:lword),
     &                      nextword(:lwordn),markl)
c shift words left
            endif
            if(lwordn.eq.0) goto 3
            lastword = word
            lwordl = lword
            word = nextword
            lword = lwordn
            lwordn = 0
c
c mark signifies whether "word" was preceded by a clause end
c
            markl = mark
            mark = markn
            goto 3
         else if(pass.eq.mxpass) then
c
c Last pass .... pipe out HTML
c
            lastword = word
            lwordl = lword
            word = nextword
            lword = lwordn
            lwordn = 0
            markl = mark
            mark = markn
c
c check for tag
c
            ltag = 0
            if(lword.ne.0) ltag = totag(lword,word,tag)
            temp = chtml
            if(lhtml+ltag+1.ge.70) then
               write(6,*) chtml(:lhtml)
               lhtml = 0
            endif
c
c check for translation chars
c
            tchar(1:1) = cc
            lt = 1
            if(cc.eq.'<') then tchar = '<'
               lt = 5
            else if(cc.eq.'>') then
               tchar = '>'
               lt = 5
            else if(cc.eq.'&') then
               tchar = '&'
               lt = 5
            endif
            if(ltag.eq.0) then
               chtml = temp(:lhtml)//tchar(:lt)
               lhtml = lhtml+lt
            else if(cc.ne.' '.or.cclast.ne.cc) then
               chtml = temp(:lhtml)//tag(:ltag)//tchar(:lt)
               lhtml = lhtml+ltag+lt
            else
               chtml = temp(:lhtml)//tag(:ltag)
               lhtml = lhtml+ltag
            endif
            goto 3
         endif
    4    lpar = 0
         if(lwordl*lword.ne.0.and.pass.lt.mxpass)
     &     call context(lastword(:lwordl),word(:lword),' ',markl)
         if(pass.eq.mxpass.and.ll.eq.0) then
            write(6,*) chtml(:lhtml)
            lhtml = 0
            write(6,*) '

' endif else c c add line to para c temp = para para = temp(:lpar)//' '//cline(:ll) lpar = lpar + ll + 1 endif goto 1 c c End of pass ... c 2 continue if(pass.lt.mxpass) then write(7,*) ' A total of ',nnoun,' candidate nouns at pass ',pass write(7,'(1x,a20,f10.5)') (noun(i)(:len_noun(i)), & real(weight_noun(i))/real(freq_noun(i)),i=1,nnoun) endif c pass = pass+1 c c Prepare final weights for tagging pass c if(pass.eq.mxpass.and.nnoun.gt.0) then inoun = 1 6 if(inoun.le.nnoun) then f = real(weight_noun(inoun))/real(freq_noun(inoun)) weight_noun(inoun) = 0 c c For filenames, set the weight to be 2 c if(index(noun(inoun),'.').ne.0) weight_noun(inoun)=2 if(index(noun(inoun),'\').ne.0) weight_noun(inoun)=2 if(f.lt.fnice) then c c remove from list c write(7,*) ' Rejecting ',noun(inoun),f do i=inoun+1,nnoun noun(i-1) = noun(i) weight_noun(i-1) = weight_noun(i) freq_noun(i-1) = freq_noun(i) len_noun(i-1) = len_noun(i) enddo nnoun = nnoun-1 else inoun = inoun + 1 endif goto 6 endif write(7,*) ' A total of ',nnoun,' filtered nouns' write(7,'(1x,a20,f10.5)') (noun(i)(:len_noun(i)), & real(weight_noun(i))/real(freq_noun(i)),i=1,nnoun) endif c if(pass.le.mxpass) then lpar = 0 rewind(1) goto 1 endif write(6,*) chtml(:lhtml) stop 900 write(6,*) ' Error opening input file' end *=*=*=*= context.html =*=*=*=*

subroutine context


      subroutine context(lastword,word,nextword,mark)
      implicit integer(a-z)
      parameter (mxnoun=1000,mxlen=50)
      common /nouns/ nnoun,len_noun,weight_noun,freq_noun,noun
      character*(mxlen) lowlastword,lowword,lownextword
      character*(mxlen) noun(mxnoun)
      character*(mxlen) clower
      character*(*) lastword,word,nextword
      character*(1) c
      external clower
      logical lcapit
      integer len_noun(mxnoun),weight_noun(mxnoun),freq_noun(mxnoun)
c
      lcapit(c) = lge(c,'A').and.lle(c,'Z')
c
      if (nnoun.ge.mxnoun) return
      if (word.eq.' ') return
      lw = lenocc(word)
      lowword = clower(word,lw)
c
c Get index of this noun, if it exists
c
      ifound = 0
      do 1 i=1,nnoun
         if(noun(i).eq.lowword) then
            ifound = i
            goto 2
         endif
    1 continue
    2 continue
c
c Check the candidate word as a plural
c
      if(lowword(lw:lw).eq.'s'.and.lw.gt.2) then
         weight = 4
         do i=1,nnoun
            if(lowword(:lw-1).eq.noun(i)) then
              weight_noun(i) = weight_noun(i)+1
              freq_noun(i) = freq_noun(i)+1
              goto 100
            endif
         enddo
      endif
      lwl = lenocc(lastword)
      lwn = lenocc(nextword)
      lownextword = clower(nextword,lwn)
      lowlastword = clower(lastword,lwl)
c
c If word contains . or \ then it is a file
c
      weight = 50
      if(index(word(:lw),'.').ne.0.or.index(word(:lw),'\')) goto 100
c
c If not start of clause, and first letter is capital, then noun
c
      weight = 10
      if(mark.eq.0.and.lcapit(word(1:1))) goto 100
c
c If start of clause, and first two letters are capital, then noun
c
      weight = 5
      if(mark.ne.0.and.lw.gt.1.and.lcapit(word(2:2))) goto 100
c
c
c Check if word on right is a noun
c
      weight = 0
      do i=1,nnoun
         if(lownextword.eq.noun(i)) then
            weight = -2
            goto 100
         endif
      enddo
      weight = 2
      if (lowlastword.eq.'the') goto 100
      weight = 1
      if (lowlastword.eq.'a') goto 100
c
c probably not a noun
c
      weight = -1
c
 100  continue
      if(ifound.ne.0) then
         weight_noun(ifound)=weight_noun(ifound)+weight
         freq_noun(ifound)=freq_noun(ifound)+1
      endif
c
c reduce weight on left if that is supposed to be a noun, too
c
      do i=1,nnoun
         if(lowlastword.eq.noun(i)) then
            weight_noun(i) = weight_noun(i)-2
         endif
      end do
      if(weight.le.0.or.ifound.ne.0) return
      nnoun = nnoun+1
      noun(nnoun) = lowword
      len_noun(nnoun) = lw
      weight_noun(nnoun) = weight
      freq_noun(nnoun) = 1
      end

c *=*=*=*= Lenocc.html =*=*=*=*

Integer Function Lenocc


      Integer Function Lenocc(c)
      Character*(*) c
      do lenocc=len(c),1,-1
         if(c(lenocc:lenocc).ne.' ') return
      enddo
      lenocc = 0
      end

*=*=*=*= clower.html =*=*=*=*

character*(*) function clower


      character*(*) function clower(cword,l)
      character*(*) cword
      character*(26) lc /'abcdefghijklmnopqrstuvwxyz'/
      character*(26) uc /'ABCDEFGHIJKLMNOPQRSTUVWXYZ'/
      clower = cword
      do i=1,l
         ip = index(uc,cword(i:i))
         if(ip.ne.0) clower(i:i) = lc(ip:ip)
      enddo

      end