Dataframe formats


Type:	Programming
Rating:	2 (easy-medium)
Description:	This example demonstrates dataframe-style representation of data. This functionality is provided for CSV format files via the 'diskdata' I/O driver and subroutine of module mmetc, for the spreadsheet I/O drivers 'xls', 'xlsx', and 'csv' of module mmsheet, and for databases accessed through an ODBC connection via SQL commands. dataframecsv.mos: reading and writing dataframes for CSV format data via 'diskdata' dataframedb.mos: dataframe-style reading of database tables via 'SQLdataframe' dataframesht.mos: reading and writing dataframes for spreadsheets and CSV format data via mmsheet
File(s):	dataframecsv.mos, dataframedb.mos, dataframesht.mos
Data file(s):	dframedata.csv, dframedata.xls, dframedata.xlsx

dataframecsv.mos

(!******************************************************
   Mosel Example Problems
   ======================

   file dataframecsv.mos
   `````````````````````
   Dataframe functionality for reading and writing CSV files
   via 'diskdata' (I/O driver and subroutine)
   
  (c) 2021 Fair Isaac Corporation
      author: S.Heipcke, Oct 2021, rev. Dec. 2022
*******************************************************!) 
model 'dataframecsv'
 uses 'mmsystem', 'mmetc'

 public declarations
   data=`
C_e,C_d,C_c,C_b,C_a,C_s
1,,"3",true,5.5,"r 1"
6,7,"8",,10.5,"r 2"
`
   datanh=`
1,,"3",true,5.5,"r 1"
6,7,"8",,10.5,"r 2"
`
   datacsv=`
C_e,C_d,C_c,C_b,C_a,C_s
1,,"3",true,5.5,r 1
6,7,"8",,10.5,"r "" 2"
`
   datasepfmt=`
C_e;C_d;C_c;C_b;C_a;C_s
1;;"3";true;5,5;"r 1"
6;7;"8";;10,5;"r 2"
`
 end-declarations

 CSVFILE:="text:data"
 CSVFILENH:="text:datanh"
 CSVFMTFILE:="text:datacsv"
 CSVSEPFILE:="text:datasepfmt"

!*** I/O driver version populating row counter and column (field names) indices
 declarations
   dfd: dynamic array(r:range, s:set of string) of text
   dff: array(rf:range, sf:set of string) of text
   dfa: dynamic array(ra:range, sa:set of string) of any
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd as "dataframe;skiph;"+CSVFILE
   dff as "dataframe;skiph;"+CSVFILE
   dfa as "dataframe;skiph;"+CSVFILE
 end-initialisations
 writeln("dyn:", dfd.size, " dns:", dff.size) ! Output displayed: dyn:10 dns:12
 writeln(dfa)
 initialisations to 'mmetc.diskdata:'
   dfd as "dataframe;skiph;res.csv"           ! Output CSV file with header line
   dfd as "dataframe;resnh.csv"               ! Output CSV file without header
 end-initialisations

!*** Subroutine version populating row counter and column (field names) indices
 declarations
   dfd1: dynamic array(r1:range, s1:set of string) of text
   dff1: array(r1f:range, s1f:set of string) of text
   dfa1: dynamic array(ra1:range, sa1:set of string) of any
 end-declarations
 diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dfd1)
 diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dff1)
 diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dfa1)
 writeln("dyn:", dfd.size, " dns:", dff.size) ! Output displayed: dyn:10 dns:12
 writeln(dfa1)
! Output CSV file with and without header line
 diskdata(ETC_SKIPH+ETC_OUT+ETC_CSV+ETC_DATAFRAME, "res1.csv", dfd1)
 diskdata(ETC_OUT+ETC_CSV+ETC_DATAFRAME, "resnh1.csv", dfd1)

!*** No header line
 declarations
   dfd2: dynamic array(r21:range, r2:range) of text
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd2 as "dataframe;"+CSVFILENH
 end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME, CSVFILENH, dfd2)
 writeln("size df2=", dfd2.size)            ! Output displayed: size df2=10
 writeln("r1=", r21, " r2=", r2)            ! Output displayed: r1=1..2 r2=1..6

!*** Selection of fields (I/O driver version only)
 declarations
   dfd3: dynamic array(r3:range,s3:set of string) of text
   dfa3: dynamic array(range,set of string) of any
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd3 as "dataframe;skiph;csv(C_d,C_b);"+CSVFILE
   dfa3 as "dataframe;skiph;cols(C_d,C_b);"+CSVFILE
! Same as:
!   dfa3 as "dataframe;skiph;typedcsv(C_d,C_b);"+CSVFILE
!   dfa3 as "dataframe;skiph;cols(#2,#4);"+CSVFILE
 end-initialisations
 writeln("size df3=", dfd3.size)            ! Output displayed: size df3=2
 writeln("is integer:", dfa3(2,'C_d') is integer, 
         ", is boolean:", dfa3(1,'C_b') is boolean)

!*** Fixed number of rows
 declarations
   dfd4: dynamic array(1..1,s4:set of string) of text
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd4 as "dataframe;skiph;"+CSVFILE
 end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, df4)
 writeln("size df4=", dfd4.size)           ! Output displayed: size df4=5

!*** Typed (integer) array
 declarations
   dfi: dynamic array(ri:range,si:set of string) of integer
 end-declarations
! Reading data of other types into an integer array will result in an
! I/O error: instead of stopping we continue to display what has been read
 setparam("ioctrl",true)
  initialisations from 'mmetc.diskdata:'
   dfi as "dataframe;skiph;"+CSVFILE
 end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dfi)
 setparam("ioctrl",false)
 if getparam("iostatus")<>0 then
   writeln("Error reading input file, interrupted in line ", ri.size)
 end-if
! Display what we have managed to read
 writeln(dfi, " size dfi=", dfi.size)      ! Ouput displayed: size dfi=4

!*** Alternative separator characters (I/O driver version only)
 declarations
   dfd5: dynamic array(range, set of string) of text
   dff5: array(range, set of string) of text
   dfa5: dynamic array(range, set of string) of any
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd5 as "dataframe;skiph;dsep=,;fsep=;;"+CSVSEPFILE
   dff5 as "dataframe;skiph;dsep=,;fsep=;;"+CSVSEPFILE
   dfa5 as "dataframe;skiph;dsep=,;fsep=;;"+CSVSEPFILE
 end-initialisations
 writeln("dyn:", dfd5.size, " dns:", dff5.size) ! Output displayed: dyn:10 dns:12
 writeln(dfa5)
 initialisations to 'mmetc.diskdata:'
   dfa5 as "dataframe;skiph;csv;dsep=,;fsep=;;ressep.csv"  ! Output CSV-format file with header line
   dfa5 as "dataframe;dsep=,;fsep=;;resnhsep.csv"  ! Output to file without header
 end-initialisations

!*** Reading file with CSV-style string formating
 declarations
   dfd6: dynamic array(range, set of string) of text
   dff6: array(range, set of string) of any
   dfa6: dynamic array(range, set of string) of any
 end-declarations
 initialisations from 'mmetc.diskdata:'
   dfd6 as "dataframe;skiph;csv;"+CSVFMTFILE       ! Untyped (all read as text)
   dff6 as "dataframe;skiph;csv;"+CSVFMTFILE       ! Untyped 
   dfa6 as "dataframe;skiph;typedcsv;"+CSVFMTFILE  ! Typed
 end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME+ETC_SKIPH+ETC_TYPEDCSV, CSVFMTFILE, dfa6)
 writeln("dyn:", dfd6.size, " dns:", dff6.size) ! Output displayed: dyn:10 dns:12
 writeln("is string:", dff6(1,'C_e') is string, 
         ", is string:", dff6(1,'C_b') is string,
	 ", string:", dff6(2,'C_s')='r " 2')
 writeln("is integer:", dfa6(1,'C_e') is integer, 
         ", is boolean:", dfa6(1,'C_b') is boolean,
	 ", string:", dfa6(1,'C_s')="r 1")

end-model

dataframedb.mos

(!******************************************************
   Mosel Example Problems
   ======================

   file dataframedb.mos
   ````````````````````
   Working with SQL dataframe functionality
   
  (c) 2021 Fair Isaac Corporation
      author: S.Heipcke, Oct 2021, rev. Apr. 2024
*******************************************************!) 
model 'dataframedb'
options keepassert
uses 'mmodbc','mmsystem'

! **** Database creation ****
procedure createDB(cstr:string)
 declarations
   REC=1..7
   article,owner: array(REC) of integer
   dealer,style,color: array(REC) of string
   price: array(REC) of real
 end-declarations
 
 article::[1,1,2,3,3,3,4]
 dealer::['A','B','A','B','C','D','D']
 price::[3.45,3.99,10.99,1.45,1.69,1.25,19.95]
 style::['polo','dress','t-shirt','dress', 'polo','dress','t-shirt']
 color::['blue','white','blue',   'orange','red', 'blue', 'white']
 owner::[1,1,1,2,2,2,2]

 writeln("Creating tables for `sdkdatafrm'")
 SQLconnect(cstr)

 SQLexecute("drop table if exists shop")
 SQLexecute("create table shop (article integer, owner varchar(10))")
 assert(getparam("SQLsuccess"))
 forall(i in REC) do
   SQLexecute("insert into shop (article,owner) values (?,?)",
       [article(i),dealer(i)] )
   assert(getparam("SQLsuccess"))
 end-do

 SQLexecute("drop table if exists shirts")
 SQLexecute("create table shirts (id integer, style varchar(10),"+
       "price float, color varchar(10), owner integer)")
 assert(getparam("SQLsuccess"))

 SQLexecute("insert into shirts (id,style,price,color,owner) values (?,?,?,?,?)",
       [style,price,color,owner])
 assert(getparam("SQLsuccess"))

 SQLexecute("drop table if exists dfoutput")
 SQLexecute("create table dfoutput (article integer, color varchar(10), owner varchar(10), shop integer, flag boolean, price float)")
 assert(getparam("SQLsuccess"))

 SQLdisconnect
end-procedure

! ************ Reading all or selected fields from database tables ************
procedure readtests
 declarations
   dfa:array(ra:range,csa:set of string) of any
   dft:array(rt:range,cst:set of string) of text
   dfi:array(ri:range,csi:set of string) of integer
   dfa2:array(ra2:range,csa2:set of string) of any
 end-declarations

 writeln("+ All columns (any)")
 SQLdataframe("select * from shirts", dfa)
 assert(getparam("SQLsuccess"))
 writeln("csa: ", csa," ra: ", ra)
 writeln("dfa: ", dfa)          ! Typed data
 writeln(sum(i in ra) dfa(i,"owner").integer)
 writeln

! Select some columns and specify new name for one of them
 writeln("+ Index set initialised and column selection (text)")
 SQLdataframe("select id as prod,color as couleur,style from shirts", dft)
 assert(getparam("SQLsuccess"))
 writeln("cst: ", cst," rt: ", rt)
 writeln("dft: ", dft)          ! All data as text
 writeln

! A typed array is accepted, but only fields with matching types are read
 writeln("+ As integer")
 SQLdataframe("select * from shirts", dfi)
 assert(getparam("SQLsuccess"))
 writeln("csi: ", csi," ri: ", ri)
 writeln("dfi: ", dfi)          ! Typed data
 writeln(sum(i in ri,j in csi) dfi(i,j))
 writeln

! Can handle data from multiple tables, in the case of multiple occurrences
! of field names unique names are generated
 writeln("+ Duplicate names in columns")
 SQLdataframe("select * from shirts inner join shop on id=article", dfa2)
 assert(getparam("SQLsuccess"))
 writeln("csa2: ", csa2," ra2: ", ra2)
 writeln("dfa2: ", dfa2) 
 writeln
end-procedure

! ************ Using SQLupdate with a dataframe structure ************

procedure testupdate
 declarations
   dfa:array(ra:range,csa:set of string) of text or any
   dft:array(rt:range,cst:set of string) of text
   Idx: text
 end-declarations

! Reading union type dataframe structure
 SQLdataframe("select * from shirts", dfa)
 assert(getparam("SQLsuccess"))
 writeln("Original df: ", dfa)
 writeln
 forall(i in ra) dfa(i,"price"):=dfa(i,"price").real+10
 forall(i in ra) dfa(i,"style"):=dfa(i,"style").text+"_xyz"
 writeln("Modified df: ", dfa)

 k:=0
 forall(j in csa-{'id'}, k as counter) Idx+=(if(k>1, ", "+j, j)+"=?"+(k+1))
 query:="update shirts set "+Idx+" where id=?1"
 writeln(query)

 finalise(csa)                    ! Required for 'SQLndxcol=false'
 setparam("SQLndxcol", false)     ! Partially indexed data
 SQLexecute(query, dfa)
 assert(getparam("SQLsuccess"))

! Dislay new table contents
 SQLdataframe("select * from shirts",dft)
 assert(getparam("SQLsuccess"))
 writeln("dft: ", dft)
end-procedure

! ************ Using SQL insert with a dynamic dataframe structure ************

 public declarations
   data=`
article,color,owner,shop,flag,price
1,"red","abc",2,,3 
4,"blue",,,true,5.5
6,,"abc",7,false,8.9
`
 end-declarations

procedure writetest
 declarations
   dfa,dfa2:dynamic array(ra:range,csa:set of string) of any
   fields,query: text
 end-declarations

! Populate dataframe with input data
 initialisations from 'mmetc.diskdata:'
   dfa as "dataframe;skiph;text:data"
 end-initialisations

 finalize(csa)                   ! Required for 'SQLndxcol=false'
 setparam("SQLndxcol", true)     ! Partially indexed data
 writeln("Initial data size: ", dfa.size, " dfa:", dfa)
! SQLexecute("insert into dfoutput (article,color,owner,shop,flag,price) values (?,?,?,?,?,?)", dfa)
 fields:=jointext(dfa.index(2))
 query:=jointext(sum(i in dfa.index(2))['?'])
 SQLexecute("insert into dfoutput ("+fields+") values ("+query+")", dfa)
 assert(getparam("SQLsuccess"))
 setparam("SQLndxcol", true)     ! Partially indexed data

! Display new table contents
 SQLdataframe("select * from dfoutput", dfa2)
 assert(getparam("SQLsuccess"))
 writeln("Output data size: ", dfa2.size, " dfa2:", dfa2)
 
end-procedure


!**********************************************************

! Create the database
 createDB("dbtest.sqlite")
 setparam("SQLdebug",true)
 setparam("SQLverbose",true)
 SQLconnect("dbtest.sqlite")
 assert(getparam("SQLsuccess"))

! Run data reading and writing procedures
 readtests
 testupdate
 writetest

 SQLdisconnect

end-model

dataframesht.mos

(!******************************************************
   Mosel Example Problems
   ======================

   file dataframesht.mos
   `````````````````````
   Dataframe-style reading for spreadsheet files in XLXS or
   CSV format
   
  (c) 2021 Fair Isaac Corporation
      author: S.Heipcke, Oct 2021, rev. Feb. 2024
*******************************************************!) 
model "dataframesht"
 uses "mmsheet", "mmreflect", "mmsystem"

!**** Initialize some global data used by reading and writing routines
 declarations
   ATYPE: array(integer) of string
   Ar: dynamic array(range,string) of any
   A2: dynamic array(range,range) of any
   public data=`
C_e,C_d,C_c,C_b,C_a,C_s
1,,"3",true,5.5,"r 1"
6,7,"8",,10.5,"r 2"
`
   public data2=`
C_e,C_d,C_c,C_b,C_a,C_s
1,,'3',true,5.5,r 1
6,7,'8',,10.5,r 2
`
   public datanh=`
1,,'3',true,5.5,r 1
6,7,'8',,10.5,r 2
`
 end-declarations
 ATYPE(integer.id):="integer"; ATYPE(real.id):="real";
 ATYPE(string.id):="string"; ATYPE(boolean.id):="boolean"

 initialisations from 'mmsheet.csv:mmsystem.text:data'
   Ar as 'dataframe;skiph;[]'
 end-initialisations
 writeln("Ar=", Ar)

 initialisations from 'mmsheet.csv:mmsystem.text:datanh'
   A2 as 'dataframe;[]'
 end-initialisations
 writeln("A2=", A2, " size=", A2.size, A2.index(2))

!**********************************************************

! Reading dataframe input data into text format or union-type arrays 
 procedure readdf(drvfile: string)
   declarations
     FieldsA: set of string
     TabinfoA: array(RA:range,FieldsA) of any
!    TabinfoA: dynamic array(RA:range,FieldsA) of any
     TabinfoA2,TabinfoA3: array(range,string) of any
     TabinfoT: array(RT:range,FieldsT:set of string) of text
     TabinfoT2: array(range,range) of text
     TabinfoR: array(range,string) of real
   end-declarations

 !**** Treating all data as text
   initialisations from drvfile
     TabinfoT as 'dataframe;skiph;[]'
     TabinfoT2 as 'dataframe;[B:D]'       ! Title row included as data
   end-initialisations

   writeln("Fields:", FieldsT)
   writeln("Text format data with skiph:", TabinfoT) 
   writeln("Text format data no skiph:", TabinfoT2)
   writeln("Rows: ", RT, " ", TabinfoT2.index(1))

 ! Parse contents to obtain numerical values:
   writeln("Sum of NumFields: ", 
     sum(i in RT) parsereal(TabinfoT(i,'NumField'),1))

 !**** Reading suitable data into a typed array
   initialisations from drvfile
     TabinfoR as 'dataframe;skiph;[](NumField,IntField)'
   end-initialisations

   writeln("Fields:", TabinfoR.index(2))
   writeln("Number format data:", TabinfoR)


 !**** Reading typed data (union type); selection of fields by order number
 ! Partially prepopulated set: index set is getting completed
   FieldsA:={'test','NumField'}

   initializations from drvfile
      TabinfoA as "dataframe;skiph;[](#1,#2,#6,#3,#4)" 
   end-initializations

   writeln("Fields:", FieldsA)
   writeln("Typed data:", TabinfoA)

 ! Accessing specific elements:
   writeln("entry (2,'IntField'): ", TabinfoA(2,"IntField"), 
     " has type ", ATYPE(TabinfoA(2,'IntField').typeid) )
   writeln("entry (3,'NumField'): ", TabinfoA(3,'NumField'), 
     " has type ", ATYPE(TabinfoA(3,'NumField').typeid) )
   writeln("entry (1,'BoolField'): ", TabinfoA(1,'BoolField'), 
     " has type ", ATYPE(TabinfoA(1,'BoolField').typeid) )
   writeln("entry (1,'DateField'): ", TabinfoA(1,'DateField'), 
     " has type ", ATYPE(TabinfoA(1,'DateField').typeid) )

 ! Use contents as numerical values:
   forall(i in RA) writeln(i, "  ",TabinfoA(i,'NumField').real)
   writeln("Sum of NumFields: ", sum(i in RA) TabinfoA(i,'NumField').real)

 !**** Unnamed index sets 
   initializations from drvfile
  ! Selection of fields by name
     TabinfoA2 as "dataframe;skiph;[](StrField,DateField,BoolField)" 
  ! Column range specification; selection of fields by order number
     TabinfoA3 as "dataframe;skiph;[B:E](#1,#4)" 
   end-initializations

   writeln("A2: Fields:", TabinfoA2.index(2))
   writeln("Typed data:", TabinfoA2)

   writeln("A3: Fields:", TabinfoA3.index(2))
   writeln("Typed data:", TabinfoA3)
 end-procedure

!**********************************************************
 
! This routine creates 3 new files (after deleting any already existing versions)
! of the specified type each time it is invoked
 procedure writedf(ftype: string)
  ! Cleaning up any existing files
   fdelete("dframeout."+ftype)
   fdelete("dframeout2."+ftype)
   fdelete("dframeout3."+ftype)
   fdelete("dframeout4."+ftype)

  ! Writing out field names with the array (option skiph+)
   fname:=formattext("mmsheet.%s:dframeout.%1$s",ftype)
   initialisations to fname
     Ar as 'dataframe;skiph+;[A:F]'
   end-initialisations

  ! Pre-populated re-ordered field names in the first N columns
  ! Writing the array without field names (option skiph)
   fname:=formattext("mmsheet.%s:dframeout2.%1$s",ftype)
   L:=["C_s","C_e","C_d","C_c","C_b","C_a"]      
   initialisations to fname
     L as '[A:F]'
     Ar as 'dataframe;skiph;[]'
   end-initialisations 

  ! Pre-populated re-ordered field names in some column range
  ! Selection of fields to be written out
   fname:=formattext("mmsheet.%s:dframeout3.%1$s",ftype)
   initialisations to fname
     L as '[R2C3:R2C8]'
     evaluation of array(i in Ar.index(1),j in ["C_e","C_s"]) 
       Ar(i,j) as 'dataframe;skiph;grow;[R2C3:R2C8](C_e,C_s)'
   end-initialisations !)

  ! Writing out a range without any header line 
  ! (Only possible if second index set is of type 'range')
   fname:=formattext("mmsheet.%s:dframeout4.%1$s",ftype)
   initialisations to fname
     A2 as 'dataframe;[A:F]'  
   end-initialisations
 end-procedure

!**********************************************************

 writeln("****CSV")
 readdf("mmsheet.csv:dframedata.csv")
 writedf("csv")

 writeln("****XLSX")
 readdf("mmsheet.xlsx:dframedata.xlsx")
 writedf("xlsx")

 writeln("****XLS")
 readdf("mmsheet.xls:dframedata.xls")
 writedf("xls")

end-model

© 2001-2024 Fair Isaac Corporation. All rights reserved. This documentation is the property of Fair Isaac Corporation (“FICO”). Receipt or possession of this documentation does not convey rights to disclose, reproduce, make derivative works, use, or allow others to use it except solely for internal evaluation purposes to determine whether to purchase a license to the software described in this documentation, or as otherwise set forth in a written software license agreement between you and FICO (or a FICO affiliate). Use of this documentation and the software described in it must conform strictly to the foregoing permitted uses, and no other use is permitted.

Contents

Index

Glossary

Search Results

Dataframe formats