Text handling and regular expressions


Type:	Programming
Rating:	2 (easy-medium)
Description:	parsectx.mos: Parsing with parser contexts regex.mos: Regular expression matching and replacement
File(s):	parsectx.mos, regex.mos

parsectx.mos

(!******************************************************
   Mosel User Guide Example Problems
   ================================= 

   file parsectx.mos 
   `````````````````
   Parsing with parser contexts.
 
   (c) 2015 Fair Isaac Corporation
       author: S. Heipcke, Apr 2015
*******************************************************!)
model "parsing context" 
 uses "mmsystem"

 declarations
   pctx,pctx1,pctx2,pctx3,pctxi: parsectx
   values,values1,values2: list of real
   ivalues,ivalues2: list of integer
   comma=getchar(",",1)              ! ASCII value for ","
 end-declarations

 txt:= text(", , 123.4 , 345.6 ,")

! Parsing without context
 setparam("sys_sepchar", comma)      ! Comma as separation character
 setparam("sys_trim", true)          ! Trim blanks around separation character
 while (nextfield(txt)) do           ! Get next field
   values+= [parsereal(txt)]         ! Read a real number from the field
   writeln("Read up to position ", getparam("sys_endparse")) 
 end-do
 writeln("##parse0: ", values)
! Output: [123.4,345.6]


! Parsing real numbers with context
 setsepchar(pctx1, comma)            ! Comma as separation character
 settrim(pctx1, true)                ! Trim blanks around separation character
 while (nextfield(txt,pctx1)) do     ! Get next field
   values1+= [parsereal(txt, pctx1)] ! Read a real number from the field
   writeln("Read up to position ", pctx1.endparse) 
 end-do
 writeln("##parse1: ", values1)
! Output: [123.4,345.6]


! Parsing with context, using error checks
 setendparse(pctx, 0)                ! Start at the beginning of text 
 setsepchar(pctx, comma)             ! Comma as separation character
 settrim(pctx, true)                 ! Trim blanks around separation character
 while (nextfield(txt,pctx)) do      ! Get next field
   if getchar(txt, pctx.endparse)=comma or pctx.endparse>=txt.size then
     values2+=[0.0]                  ! The field is empty
   else  
     r:=parsereal(txt, pctx)         ! Read a real number from the field
     if getsysstat=0 then values2+= [r]
     else
       writeln("Malformed field contents at position ", pctx.endparse,
       " (", copytext(txt, pctx.endparse,pctx.endparse+2), ")")
     end-if
   end-if
   writeln("Read up to position ", pctx.endparse) 
 end-do
 writeln("##parse2: ", values2)
! Output: [0,0,123.4,345.6,0]


! Parsing integer numbers with context
 pctx3.endparse:=0                   ! Start at the beginning of text
 pctx3.sepchar:=comma                ! Comma as separation character
 pctx3.trim:=true                    ! Trim blanks around separation character
 while (nextfield(txt,pctx3)) do     ! Get next field
   i:=parseint(txt,pctx3)            ! Read an integer number from the field
   if getsysstat=0 then ivalues+= [i]; end-if
   writeln("Read up to position ", pctx3.endparse)
 end-do
 writeln("##parse3: ", ivalues)
! Output: [123]   (nextfield fails after reading an incomplete field)


! Parsing using 2 contexts
 setsepchar(pctx2, comma)            ! Comma as separation character
 settrim(pctx2, true)                ! Trim blanks around separation character
 while (nextfield(txt,pctx2)) do     ! Get next field
   tt:=parsetext(txt, pctx2)         ! Get contents of the field
   pctxi.endparse:=1                 ! Reset start to beginning of the text
   i:=parseint(tt,pctxi)             ! Read an integer number from the field
   if getsysstat=0 then ivalues2+= [i]; end-if
   writeln("Read up to position ", pctx2.endparse) 
 end-do
 writeln("##parse4: ", ivalues2)
! Output: [123,345]

end-model

regex.mos

(!******************************************************
   Mosel User Guide Example Problems
   ================================= 

   file regex.mos 
   ``````````````
   Regular expression matching and replacement.
 
   (c) 2015 Fair Isaac Corporation
       author: S. Heipcke, Apr 2015
*******************************************************!)
model "test regex" 
 uses "mmsystem"

 declarations
  m: array(0..3) of textarea  
        ! m(0) whole identified zone,  m(1) [,...,m(9)] match results
  t: text
 end-declarations

!**** Pattern matching ****
 t:="MyValue=10,Sometext mytext MoretextMytext2, MYVAL=1.5 mYtext3"
! Display all strings starting with 'My' (case insensitive)
 m(0).succ:=1
 while (regmatch(t, '\<My\(\w*\)', m(0).succ, REG_ICASE, m))
  writeln("Word starting with 'My': ", 
   copytext(t,m(0)), " (", copytext(t,m(1)),")")

! Display all strings containing 'My' not at beginning (case insensitive)
 m(0).succ:=1
 while (regmatch(t, '\w+((My)(\w*))', m(0).succ, REG_ICASE+REG_EXTENDED, m))
  writeln("String containing 'My' (not at beginning): ", 
   copytext(t,m(0)), " (", copytext(t,m(1)), "=", copytext(t,m(2)) ,
   "+", copytext(t,m(3)), ")")

! Alternative way of stating the same expression   
 m(0).succ:=1
 while (regmatch(t, '[[:alnum:]_]+((My)([[:alnum:]_]*))', m(0).succ,  
        REG_ICASE+REG_EXTENDED, m))
  writeln("String containing 'My' (not at beginning): ", 
   copytext(t,m(0)), " (", copytext(t,m(1)), "=", copytext(t,m(2)) ,
   "+", copytext(t,m(3)), ")")
 
(!
 < beginning of word
 w or [:alnum:]_  alphanumeric or underscore characters
 * 0 or more times
 () select the result to be returned as the match, mask with backslash in BRE
 + 1 or more times (only in ERE) 
!)


!**** Replacement of matching expressions ****
! Replace a dates of the format yyyy-mm-dd to the format dd/mm/yyyy
 t:="date1=20/11/2010,date2=1-Oct-2013,date3=2014-6-30"
 numr:= regreplace(t, '([[:digit:]]{4})-([01]?[[:digit:]])-([0-3]?[[:digit:]])',
              '\3/\2/\1', 1, REG_EXTENDED) 
 if numr>0 then
  writeln(numr, " replacements: ", t)
 end-if 

! The same using BRE syntax:
 t:="date1=20/11/2010,date2=1-Oct-2013,date3=2014-6-30"
 writeln( regreplace(t, '\(\d\{4\}\)-\([01]\{0,1\}\d\)-\([0-3]\{0,1\}\d\)',
         '\3/\2/\1' ),  " replacements: ", t)
! The same more readable (ERE syntax):
 numr:= regreplace(t, '(\d{4})-([01]{0,1}\d)-([0-3]{0,1}\d)',
         '\3/\2/\1', 1, REG_EXTENDED ) 

(!
 \d or [:digit:]  numerical character
 ?  0 times or once (ERE only)
 {M,N}  minimum M and maximum N match count
 []  set of possible character matches
!) 

end-model

© 2001-2021 Fair Isaac Corporation. All rights reserved. This documentation is the property of Fair Isaac Corporation (“FICO”). Receipt or possession of this documentation does not convey rights to disclose, reproduce, make derivative works, use, or allow others to use it except solely for internal evaluation purposes to determine whether to purchase a license to the software described in this documentation, or as otherwise set forth in a written software license agreement between you and FICO (or a FICO affiliate). Use of this documentation and the software described in it must conform strictly to the foregoing permitted uses, and no other use is permitted.

Contents

Index

Glossary

Search Results

Text handling and regular expressions