Initializing help system before first use

Dataframe formats


Type: Programming
Rating: 2 (easy-medium)
Description:

This example demonstrates dataframe-style representation of data. This functionality is provided for CSV format files via the 'diskdata' I/O driver and subroutine of module mmetc and for databases accessed through an ODBC connection via SQL commands.

  • dataframecsv.mos: reading and writing dataframes for CSV format data
  • dataframedb.mos: dataframe-style reading of database tables via 'SQLdataframe'
File(s): dataframecsv.mos, dataframedb.mos


dataframecsv.mos
(!******************************************************
   Mosel Example Problems
   ======================

   file dataframecsv.mos
   `````````````````````
   Dataframe functionality for reading and writing CSV files
   via 'diskdata' (I/O driver and subroutine)
   
  (c) 2021 Fair Isaac Corporation
      author: S.Heipcke, Oct 2021, rev. Dec. 2022
*******************************************************!) 
model 'dataframecsv'
 uses 'mmsystem', 'mmetc'

 public declarations
   data=`
C_e,C_d,C_c,C_b,C_a,C_s
1,,"3",true,5.5,"r 1"
6,7,"8",,10.5,"r 2"
`
   datanh=`
1,,"3",true,5.5,"r 1"
6,7,"8",,10.5,"r 2"
`
   datacsv=`
C_e,C_d,C_c,C_b,C_a,C_s
1,,"3",true,5.5,r 1
6,7,"8",,10.5,"r "" 2"
`
   datasepfmt=`
C_e;C_d;C_c;C_b;C_a;C_s
1;;"3";true;5,5;"r 1"
6;7;"8";;10,5;"r 2"
`
 end-declarations

 CSVFILE:="text:data"
 CSVFILENH:="text:datanh"
 CSVFMTFILE:="text:datacsv"
 CSVSEPFILE:="text:datasepfmt"

!*** I/O driver version populating row counter and column (field names) indices
 declarations
   dfd: dynamic array(r:range, s:set of string) of text
   dff: array(rf:range, sf:set of string) of text
   dfa: dynamic array(ra:range, sa:set of string) of any
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd as "dataframe;skiph;"+CSVFILE
   dff as "dataframe;skiph;"+CSVFILE
   dfa as "dataframe;skiph;"+CSVFILE
 end-initialisations
 writeln("dyn:", dfd.size, " dns:", dff.size) ! Output displayed: dyn:10 dns:12
 writeln(dfa)
 initialisations to 'mmetc.diskdata:'
   dfd as "dataframe;skiph;res.csv"           ! Output CSV file with header line
   dfd as "dataframe;resnh.csv"               ! Output CSV file without header
 end-initialisations

!*** Subroutine version populating row counter and column (field names) indices
 declarations
   dfd1: dynamic array(r1:range, s1:set of string) of text
   dff1: array(r1f:range, s1f:set of string) of text
   dfa1: dynamic array(ra1:range, sa1:set of string) of any
 end-declarations
 diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dfd1)
 diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dff1)
 diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dfa1)
 writeln("dyn:", dfd.size, " dns:", dff.size) ! Output displayed: dyn:10 dns:12
 writeln(dfa1)
! Output CSV file with and without header line
 diskdata(ETC_SKIPH+ETC_OUT+ETC_CSV+ETC_DATAFRAME, "res1.csv", dfd1)
 diskdata(ETC_OUT+ETC_CSV+ETC_DATAFRAME, "resnh1.csv", dfd1)

!*** No header line
 declarations
   dfd2: dynamic array(r21:range, r2:range) of text
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd2 as "dataframe;"+CSVFILENH
 end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME, CSVFILENH, dfd2)
 writeln("size df2=", dfd2.size)            ! Output displayed: size df2=10
 writeln("r1=", r21, " r2=", r2)            ! Output displayed: r1=1..2 r2=1..6

!*** Selection of fields (I/O driver version only)
 declarations
   dfd3: dynamic array(r3:range,s3:set of string) of text
   dfa3: dynamic array(range,set of string) of any
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd3 as "dataframe;skiph;csv(C_d,C_b);"+CSVFILE
   dfa3 as "dataframe;skiph;cols(C_d,C_b);"+CSVFILE
! Same as:
!   dfa3 as "dataframe;skiph;typedcsv(C_d,C_b);"+CSVFILE
!   dfa3 as "dataframe;skiph;cols(#2,#4);"+CSVFILE
 end-initialisations
 writeln("size df3=", dfd3.size)            ! Output displayed: size df3=2
 writeln("is integer:", dfa3(2,'C_d') is integer, 
         ", is boolean:", dfa3(1,'C_b') is boolean)

!*** Fixed number of rows
 declarations
   dfd4: dynamic array(1..1,s4:set of string) of text
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd4 as "dataframe;skiph;"+CSVFILE
 end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, df4)
 writeln("size df4=", dfd4.size)           ! Output displayed: size df4=5

!*** Typed (integer) array
 declarations
   dfi: dynamic array(ri:range,si:set of string) of integer
 end-declarations
! Reading data of other types into an integer array will result in an
! I/O error: instead of stopping we continue to display what has been read
 setparam("ioctrl",true)
  initialisations from 'mmetc.diskdata:'
   dfi as "dataframe;skiph;"+CSVFILE
 end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dfi)
 setparam("ioctrl",false)
 if getparam("iostatus")<>0 then
   writeln("Error reading input file, interrupted in line ", ri.size)
 end-if
! Display what we have managed to read
 writeln(dfi, " size dfi=", dfi.size)      ! Ouput displayed: size dfi=4

!*** Alternative separator characters (I/O driver version only)
 declarations
   dfd5: dynamic array(range, set of string) of text
   dff5: array(range, set of string) of text
   dfa5: dynamic array(range, set of string) of any
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd5 as "dataframe;skiph;dsep=,;fsep=;;"+CSVSEPFILE
   dff5 as "dataframe;skiph;dsep=,;fsep=;;"+CSVSEPFILE
   dfa5 as "dataframe;skiph;dsep=,;fsep=;;"+CSVSEPFILE
 end-initialisations
 writeln("dyn:", dfd5.size, " dns:", dff5.size) ! Output displayed: dyn:10 dns:12
 writeln(dfa5)
 initialisations to 'mmetc.diskdata:'
   dfa5 as "dataframe;skiph;csv;dsep=,;fsep=;;ressep.csv"  ! Output CSV-format file with header line
   dfa5 as "dataframe;dsep=,;fsep=;;resnhsep.csv"  ! Output to file without header
 end-initialisations

!*** Reading file with CSV-style string formating
 declarations
   dfd6: dynamic array(range, set of string) of text
   dff6: array(range, set of string) of any
   dfa6: dynamic array(range, set of string) of any
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd6 as "dataframe;skiph;csv;"+CSVFMTFILE       ! Untyped (all read as text)
   dff6 as "dataframe;skiph;csv;"+CSVFMTFILE       ! Untyped 
   dfa6 as "dataframe;skiph;typedcsv;"+CSVFMTFILE  ! Typed
 end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME+ETC_SKIPH+ETC_TYPEDCSV, CSVFMTFILE, dfa6)
 writeln("dyn:", dfd6.size, " dns:", dff6.size) ! Output displayed: dyn:10 dns:12
 writeln("is string:", dff6(1,'C_e') is string, 
         ", is string:", dff6(1,'C_b') is string,
	 ", string:", dff6(2,'C_s')='r " 2')
 writeln("is integer:", dfa6(1,'C_e') is integer, 
         ", is boolean:", dfa6(1,'C_b') is boolean,
	 ", string:", dfa6(1,'C_s')="r 1")

end-model

dataframedb.mos
(!******************************************************
   Mosel Example Problems
   ======================

   file dataframedb.mos
   ````````````````````
   Working with SQL dataframe functionality
   
  (c) 2021 Fair Isaac Corporation
      author: S.Heipcke, Oct 2021
*******************************************************!) 
model 'dataframedb'
options keepassert
uses 'mmodbc','mmsystem'

! **** Database creation ****
procedure createDB(cstr:string)
 declarations
  REC=1..7
  article,owner: array(REC) of integer
  dealer,style,color: array(REC) of string
  price: array(REC) of real
 end-declarations
 
 article::[1,1,2,3,3,3,4]
 dealer::['A','B','A','B','C','D','D']
 price::[3.45,3.99,10.99,1.45,1.69,1.25,19.95]
 style::['polo','dress','t-shirt','dress', 'polo','dress','t-shirt']
 color::['blue','white','blue',   'orange','red', 'blue', 'white']
 owner::[1,1,1,2,2,2,2]

 writeln("Creating tables for `sdkdatafrm'")
 SQLconnect(cstr)

 SQLexecute("drop table if exists shop")
 SQLexecute("create table shop (article integer, owner varchar(10))")
 assert(getparam("SQLsuccess"))
 forall(i in REC) do
   SQLexecute("insert into shop (article,owner) values (?,?)",
       [article(i),dealer(i)] )
   assert(getparam("SQLsuccess"))
 end-do

 SQLexecute("drop table if exists shirts")
 SQLexecute("create table shirts (id integer, style varchar(10),"+
       "price float, color varchar(10), owner integer)")
 assert(getparam("SQLsuccess"))

 SQLexecute("insert into shirts (id,style,price,color,owner) values (?,?,?,?,?)",
       [style,price,color,owner])
 assert(getparam("SQLsuccess"))

 SQLdisconnect
end-procedure

! ************ Reading all or selected fields from database tables ************
procedure readtests
 declarations
  dfa:array(ra:range,csa:set of string) of any
  dft:array(rt:range,cst:set of string) of text
  dfi:array(ri:range,csi:set of string) of integer
  dfa2:array(ra2:range,csa2:set of string) of any
 end-declarations

 writeln("+ All columns (any)")
 SQLdataframe("select * from shirts", dfa)
 writeln("csa: ", csa," ra: ", ra)
 writeln("dfa: ", dfa)          ! Typed data
 writeln(sum(i in ra) dfa(i,"owner").integer)
 writeln

! Select some columns and specify new name for one of them
 writeln("+ Index set initialised and column selection (text)")
 SQLdataframe("select id as prod,color as couleur,style from shirts", dft)
 writeln("cst: ", cst," rt: ", rt)
 writeln("dft: ", dft)          ! All data as text
 writeln

! A typed array is accepted, but only fields with matching types are read
 writeln("+ As integer")
 SQLdataframe("select * from shirts", dfi)
 writeln("csi: ", csi," ri: ", ri)
 writeln("dfi: ", dfi)          ! Typed data
 writeln(sum(i in ri,j in csi) dfi(i,j))
 writeln

! Can handle data from multiple tables, in the case of multiple occurrences
! of field names unique names are generated
 writeln("+ Duplicate names in columns")
 SQLdataframe("select * from shirts inner join shop on id=article", dfa2)
 writeln("csa2: ", csa2," ra2: ", ra2)
 writeln("dfa2: ", dfa2) 
 writeln
end-procedure

! ************ Using SQLupdate with a dataframe structure ************

procedure testupdate
 declarations
  dfa:array(ra:range,csa:set of string) of text or any
  dft:array(rt:range,cst:set of string) of text
  Idx: text
 end-declarations

! Reading union type dataframe structure
 SQLdataframe("select * from shirts", dfa)
 writeln("Original df: ", dfa)
 writeln
 forall(i in ra) dfa(i,"price"):=dfa(i,"price").real+10
 forall(i in ra) dfa(i,"style"):=dfa(i,"style").text+"_xyz"
 writeln("Modified df: ", dfa)

 k:=0
 forall(j in csa-{'id'}, k as counter) Idx+=(if(k>1, ", "+j, j)+"=?"+(k+1))
 query:="update shirts set "+Idx+" where id=?1"
 writeln(query)

 finalise(csa)                    ! Required for 'SQLndxcol=false'
 setparam("SQLndxcol", false)     ! Partially indexed data
 SQLexecute(query, dfa)

! Dislay new table contents
 SQLdataframe("select * from shirts",dft)
 writeln("dft: ", dft)
end-procedure

!**********************************************************

! Create the database
 createDB("dbtest.sqlite")
 setparam("SQLdebug",true)
 setparam("SQLverbose",true)
 SQLconnect("dbtest.sqlite")
 assert(getparam("SQLsuccess"))

! Run data reading and writing procedures
 readtests
 testupdate

 SQLdisconnect

end-model

© 2001-2023 Fair Isaac Corporation. All rights reserved. This documentation is the property of Fair Isaac Corporation (“FICO”). Receipt or possession of this documentation does not convey rights to disclose, reproduce, make derivative works, use, or allow others to use it except solely for internal evaluation purposes to determine whether to purchase a license to the software described in this documentation, or as otherwise set forth in a written software license agreement between you and FICO (or a FICO affiliate). Use of this documentation and the software described in it must conform strictly to the foregoing permitted uses, and no other use is permitted.