Dataframe formats
|
|
|
| Type: | Programming |
| Rating: | 2 (easy-medium) |
| Description: | This example demonstrates dataframe-style representation of data. This functionality is provided for CSV format files via the 'diskdata' I/O driver and subroutine of module mmetc, for the spreadsheet I/O drivers 'xls', 'xlsx', and 'csv' of module mmsheet, and for databases accessed through an ODBC connection via SQL commands.
|
| File(s): | dataframecsv.mos, dataframedb.mos, dataframesht.mos |
| Data file(s): | dframedata.csv, dframedata.xls, dframedata.xlsx |
|
|
|
| dataframecsv.mos |
(!******************************************************
Mosel Example Problems
======================
file dataframecsv.mos
`````````````````````
Dataframe functionality for reading and writing CSV files
via 'diskdata' (I/O driver and subroutine)
(c) 2021 Fair Isaac Corporation
author: S.Heipcke, Oct 2021, rev. Dec. 2022
*******************************************************!)
model 'dataframecsv'
uses 'mmsystem', 'mmetc'
public declarations
data=`
C_e,C_d,C_c,C_b,C_a,C_s
1,,"3",true,5.5,"r 1"
6,7,"8",,10.5,"r 2"
`
datanh=`
1,,"3",true,5.5,"r 1"
6,7,"8",,10.5,"r 2"
`
datacsv=`
C_e,C_d,C_c,C_b,C_a,C_s
1,,"3",true,5.5,r 1
6,7,"8",,10.5,"r "" 2"
`
datasepfmt=`
C_e;C_d;C_c;C_b;C_a;C_s
1;;"3";true;5,5;"r 1"
6;7;"8";;10,5;"r 2"
`
end-declarations
CSVFILE:="text:data"
CSVFILENH:="text:datanh"
CSVFMTFILE:="text:datacsv"
CSVSEPFILE:="text:datasepfmt"
!*** I/O driver version populating row counter and column (field names) indices
declarations
dfd: dynamic array(r:range, s:set of string) of text
dff: array(rf:range, sf:set of string) of text
dfa: dynamic array(ra:range, sa:set of string) of any
end-declarations
initialisations from 'mmetc.diskdata:'
dfd as "dataframe;skiph;"+CSVFILE
dff as "dataframe;skiph;"+CSVFILE
dfa as "dataframe;skiph;"+CSVFILE
end-initialisations
writeln("dyn:", dfd.size, " dns:", dff.size) ! Output displayed: dyn:10 dns:12
writeln(dfa)
initialisations to 'mmetc.diskdata:'
dfd as "dataframe;skiph;res.csv" ! Output CSV file with header line
dfd as "dataframe;resnh.csv" ! Output CSV file without header
end-initialisations
!*** Subroutine version populating row counter and column (field names) indices
declarations
dfd1: dynamic array(r1:range, s1:set of string) of text
dff1: array(r1f:range, s1f:set of string) of text
dfa1: dynamic array(ra1:range, sa1:set of string) of any
end-declarations
diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dfd1)
diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dff1)
diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dfa1)
writeln("dyn:", dfd.size, " dns:", dff.size) ! Output displayed: dyn:10 dns:12
writeln(dfa1)
! Output CSV file with and without header line
diskdata(ETC_SKIPH+ETC_OUT+ETC_CSV+ETC_DATAFRAME, "res1.csv", dfd1)
diskdata(ETC_OUT+ETC_CSV+ETC_DATAFRAME, "resnh1.csv", dfd1)
!*** No header line
declarations
dfd2: dynamic array(r21:range, r2:range) of text
end-declarations
initialisations from 'mmetc.diskdata:'
dfd2 as "dataframe;"+CSVFILENH
end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME, CSVFILENH, dfd2)
writeln("size df2=", dfd2.size) ! Output displayed: size df2=10
writeln("r1=", r21, " r2=", r2) ! Output displayed: r1=1..2 r2=1..6
!*** Selection of fields (I/O driver version only)
declarations
dfd3: dynamic array(r3:range,s3:set of string) of text
dfa3: dynamic array(range,set of string) of any
end-declarations
initialisations from 'mmetc.diskdata:'
dfd3 as "dataframe;skiph;csv(C_d,C_b);"+CSVFILE
dfa3 as "dataframe;skiph;cols(C_d,C_b);"+CSVFILE
! Same as:
! dfa3 as "dataframe;skiph;typedcsv(C_d,C_b);"+CSVFILE
! dfa3 as "dataframe;skiph;cols(#2,#4);"+CSVFILE
end-initialisations
writeln("size df3=", dfd3.size) ! Output displayed: size df3=2
writeln("is integer:", dfa3(2,'C_d') is integer,
", is boolean:", dfa3(1,'C_b') is boolean)
!*** Fixed number of rows
declarations
dfd4: dynamic array(1..1,s4:set of string) of text
end-declarations
initialisations from 'mmetc.diskdata:'
dfd4 as "dataframe;skiph;"+CSVFILE
end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, df4)
writeln("size df4=", dfd4.size) ! Output displayed: size df4=5
!*** Typed (integer) array
declarations
dfi: dynamic array(ri:range,si:set of string) of integer
end-declarations
! Reading data of other types into an integer array will result in an
! I/O error: instead of stopping we continue to display what has been read
setparam("ioctrl",true)
initialisations from 'mmetc.diskdata:'
dfi as "dataframe;skiph;"+CSVFILE
end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME+ETC_SKIPH, CSVFILE, dfi)
setparam("ioctrl",false)
if getparam("iostatus")<>0 then
writeln("Error reading input file, interrupted in line ", ri.size)
end-if
! Display what we have managed to read
writeln(dfi, " size dfi=", dfi.size) ! Ouput displayed: size dfi=4
!*** Alternative separator characters (I/O driver version only)
declarations
dfd5: dynamic array(range, set of string) of text
dff5: array(range, set of string) of text
dfa5: dynamic array(range, set of string) of any
end-declarations
initialisations from 'mmetc.diskdata:'
dfd5 as "dataframe;skiph;dsep=,;fsep=;;"+CSVSEPFILE
dff5 as "dataframe;skiph;dsep=,;fsep=;;"+CSVSEPFILE
dfa5 as "dataframe;skiph;dsep=,;fsep=;;"+CSVSEPFILE
end-initialisations
writeln("dyn:", dfd5.size, " dns:", dff5.size) ! Output displayed: dyn:10 dns:12
writeln(dfa5)
initialisations to 'mmetc.diskdata:'
dfa5 as "dataframe;skiph;csv;dsep=,;fsep=;;ressep.csv" ! Output CSV-format file with header line
dfa5 as "dataframe;dsep=,;fsep=;;resnhsep.csv" ! Output to file without header
end-initialisations
!*** Reading file with CSV-style string formating
declarations
dfd6: dynamic array(range, set of string) of text
dff6: array(range, set of string) of any
dfa6: dynamic array(range, set of string) of any
end-declarations
initialisations from 'mmetc.diskdata:'
dfd6 as "dataframe;skiph;csv;"+CSVFMTFILE ! Untyped (all read as text)
dff6 as "dataframe;skiph;csv;"+CSVFMTFILE ! Untyped
dfa6 as "dataframe;skiph;typedcsv;"+CSVFMTFILE ! Typed
end-initialisations
! Same as:
! diskdata(ETC_DATAFRAME+ETC_SKIPH+ETC_TYPEDCSV, CSVFMTFILE, dfa6)
writeln("dyn:", dfd6.size, " dns:", dff6.size) ! Output displayed: dyn:10 dns:12
writeln("is string:", dff6(1,'C_e') is string,
", is string:", dff6(1,'C_b') is string,
", string:", dff6(2,'C_s')='r " 2')
writeln("is integer:", dfa6(1,'C_e') is integer,
", is boolean:", dfa6(1,'C_b') is boolean,
", string:", dfa6(1,'C_s')="r 1")
end-model
|
| dataframedb.mos |
(!******************************************************
Mosel Example Problems
======================
file dataframedb.mos
````````````````````
Working with SQL dataframe functionality
(c) 2021 Fair Isaac Corporation
author: S.Heipcke, Oct 2021, rev. Apr. 2024
*******************************************************!)
model 'dataframedb'
options keepassert
uses 'mmodbc','mmsystem'
! **** Database creation ****
procedure createDB(cstr:string)
declarations
REC=1..7
article,owner: array(REC) of integer
dealer,style,color: array(REC) of string
price: array(REC) of real
end-declarations
article::[1,1,2,3,3,3,4]
dealer::['A','B','A','B','C','D','D']
price::[3.45,3.99,10.99,1.45,1.69,1.25,19.95]
style::['polo','dress','t-shirt','dress', 'polo','dress','t-shirt']
color::['blue','white','blue', 'orange','red', 'blue', 'white']
owner::[1,1,1,2,2,2,2]
writeln("Creating tables for `sdkdatafrm'")
SQLconnect(cstr)
SQLexecute("drop table if exists shop")
SQLexecute("create table shop (article integer, owner varchar(10))")
assert(getparam("SQLsuccess"))
forall(i in REC) do
SQLexecute("insert into shop (article,owner) values (?,?)",
[article(i),dealer(i)] )
assert(getparam("SQLsuccess"))
end-do
SQLexecute("drop table if exists shirts")
SQLexecute("create table shirts (id integer, style varchar(10),"+
"price float, color varchar(10), owner integer)")
assert(getparam("SQLsuccess"))
SQLexecute("insert into shirts (id,style,price,color,owner) values (?,?,?,?,?)",
[style,price,color,owner])
assert(getparam("SQLsuccess"))
SQLexecute("drop table if exists dfoutput")
SQLexecute("create table dfoutput (article integer, color varchar(10), owner varchar(10), shop integer, flag boolean, price float)")
assert(getparam("SQLsuccess"))
SQLdisconnect
end-procedure
! ************ Reading all or selected fields from database tables ************
procedure readtests
declarations
dfa:array(ra:range,csa:set of string) of any
dft:array(rt:range,cst:set of string) of text
dfi:array(ri:range,csi:set of string) of integer
dfa2:array(ra2:range,csa2:set of string) of any
end-declarations
writeln("+ All columns (any)")
SQLdataframe("select * from shirts", dfa)
assert(getparam("SQLsuccess"))
writeln("csa: ", csa," ra: ", ra)
writeln("dfa: ", dfa) ! Typed data
writeln(sum(i in ra) dfa(i,"owner").integer)
writeln
! Select some columns and specify new name for one of them
writeln("+ Index set initialised and column selection (text)")
SQLdataframe("select id as prod,color as couleur,style from shirts", dft)
assert(getparam("SQLsuccess"))
writeln("cst: ", cst," rt: ", rt)
writeln("dft: ", dft) ! All data as text
writeln
! A typed array is accepted, but only fields with matching types are read
writeln("+ As integer")
SQLdataframe("select * from shirts", dfi)
assert(getparam("SQLsuccess"))
writeln("csi: ", csi," ri: ", ri)
writeln("dfi: ", dfi) ! Typed data
writeln(sum(i in ri,j in csi) dfi(i,j))
writeln
! Can handle data from multiple tables, in the case of multiple occurrences
! of field names unique names are generated
writeln("+ Duplicate names in columns")
SQLdataframe("select * from shirts inner join shop on id=article", dfa2)
assert(getparam("SQLsuccess"))
writeln("csa2: ", csa2," ra2: ", ra2)
writeln("dfa2: ", dfa2)
writeln
end-procedure
! ************ Using SQLupdate with a dataframe structure ************
procedure testupdate
declarations
dfa:array(ra:range,csa:set of string) of text or any
dft:array(rt:range,cst:set of string) of text
Idx: text
end-declarations
! Reading union type dataframe structure
SQLdataframe("select * from shirts", dfa)
assert(getparam("SQLsuccess"))
writeln("Original df: ", dfa)
writeln
forall(i in ra) dfa(i,"price"):=dfa(i,"price").real+10
forall(i in ra) dfa(i,"style"):=dfa(i,"style").text+"_xyz"
writeln("Modified df: ", dfa)
k:=0
forall(j in csa-{'id'}, k as counter) Idx+=(if(k>1, ", "+j, j)+"=?"+(k+1))
query:="update shirts set "+Idx+" where id=?1"
writeln(query)
finalise(csa) ! Required for 'SQLndxcol=false'
setparam("SQLndxcol", false) ! Partially indexed data
SQLexecute(query, dfa)
assert(getparam("SQLsuccess"))
! Dislay new table contents
SQLdataframe("select * from shirts",dft)
assert(getparam("SQLsuccess"))
writeln("dft: ", dft)
end-procedure
! ************ Using SQL insert with a dynamic dataframe structure ************
public declarations
data=`
article,color,owner,shop,flag,price
1,"red","abc",2,,3
4,"blue",,,true,5.5
6,,"abc",7,false,8.9
`
end-declarations
procedure writetest
declarations
dfa,dfa2:dynamic array(ra:range,csa:set of string) of any
fields,query: text
end-declarations
! Populate dataframe with input data
initialisations from 'mmetc.diskdata:'
dfa as "dataframe;skiph;text:data"
end-initialisations
finalize(csa) ! Required for 'SQLndxcol=false'
setparam("SQLndxcol", true) ! Partially indexed data
writeln("Initial data size: ", dfa.size, " dfa:", dfa)
! SQLexecute("insert into dfoutput (article,color,owner,shop,flag,price) values (?,?,?,?,?,?)", dfa)
fields:=jointext(dfa.index(2))
query:=jointext(sum(i in dfa.index(2))['?'])
SQLexecute("insert into dfoutput ("+fields+") values ("+query+")", dfa)
assert(getparam("SQLsuccess"))
setparam("SQLndxcol", true) ! Partially indexed data
! Display new table contents
SQLdataframe("select * from dfoutput", dfa2)
assert(getparam("SQLsuccess"))
writeln("Output data size: ", dfa2.size, " dfa2:", dfa2)
end-procedure
!**********************************************************
! Create the database
createDB("dbtest.sqlite")
setparam("SQLdebug",true)
setparam("SQLverbose",true)
SQLconnect("dbtest.sqlite")
assert(getparam("SQLsuccess"))
! Run data reading and writing procedures
readtests
testupdate
writetest
SQLdisconnect
end-model
|
| dataframesht.mos |
(!******************************************************
Mosel Example Problems
======================
file dataframesht.mos
`````````````````````
Dataframe-style reading for spreadsheet files in XLXS or
CSV format
(c) 2021 Fair Isaac Corporation
author: S.Heipcke, Oct 2021, rev. Feb. 2024
*******************************************************!)
model "dataframesht"
uses "mmsheet", "mmreflect", "mmsystem"
!**** Initialize some global data used by reading and writing routines
declarations
ATYPE: array(integer) of string
Ar: dynamic array(range,string) of any
A2: dynamic array(range,range) of any
public data=`
C_e,C_d,C_c,C_b,C_a,C_s
1,,"3",true,5.5,"r 1"
6,7,"8",,10.5,"r 2"
`
public data2=`
C_e,C_d,C_c,C_b,C_a,C_s
1,,'3',true,5.5,r 1
6,7,'8',,10.5,r 2
`
public datanh=`
1,,'3',true,5.5,r 1
6,7,'8',,10.5,r 2
`
end-declarations
ATYPE(integer.id):="integer"; ATYPE(real.id):="real";
ATYPE(string.id):="string"; ATYPE(boolean.id):="boolean"
initialisations from 'mmsheet.csv:mmsystem.text:data'
Ar as 'dataframe;skiph;[]'
end-initialisations
writeln("Ar=", Ar)
initialisations from 'mmsheet.csv:mmsystem.text:datanh'
A2 as 'dataframe;[]'
end-initialisations
writeln("A2=", A2, " size=", A2.size, A2.index(2))
!**********************************************************
! Reading dataframe input data into text format or union-type arrays
procedure readdf(drvfile: string)
declarations
FieldsA: set of string
TabinfoA: array(RA:range,FieldsA) of any
! TabinfoA: dynamic array(RA:range,FieldsA) of any
TabinfoA2,TabinfoA3: array(range,string) of any
TabinfoT: array(RT:range,FieldsT:set of string) of text
TabinfoT2: array(range,range) of text
TabinfoR: array(range,string) of real
end-declarations
!**** Treating all data as text
initialisations from drvfile
TabinfoT as 'dataframe;skiph;[]'
TabinfoT2 as 'dataframe;[B:D]' ! Title row included as data
end-initialisations
writeln("Fields:", FieldsT)
writeln("Text format data with skiph:", TabinfoT)
writeln("Text format data no skiph:", TabinfoT2)
writeln("Rows: ", RT, " ", TabinfoT2.index(1))
! Parse contents to obtain numerical values:
writeln("Sum of NumFields: ",
sum(i in RT) parsereal(TabinfoT(i,'NumField'),1))
!**** Reading suitable data into a typed array
initialisations from drvfile
TabinfoR as 'dataframe;skiph;[](NumField,IntField)'
end-initialisations
writeln("Fields:", TabinfoR.index(2))
writeln("Number format data:", TabinfoR)
!**** Reading typed data (union type); selection of fields by order number
! Partially prepopulated set: index set is getting completed
FieldsA:={'test','NumField'}
initializations from drvfile
TabinfoA as "dataframe;skiph;[](#1,#2,#6,#3,#4)"
end-initializations
writeln("Fields:", FieldsA)
writeln("Typed data:", TabinfoA)
! Accessing specific elements:
writeln("entry (2,'IntField'): ", TabinfoA(2,"IntField"),
" has type ", ATYPE(TabinfoA(2,'IntField').typeid) )
writeln("entry (3,'NumField'): ", TabinfoA(3,'NumField'),
" has type ", ATYPE(TabinfoA(3,'NumField').typeid) )
writeln("entry (1,'BoolField'): ", TabinfoA(1,'BoolField'),
" has type ", ATYPE(TabinfoA(1,'BoolField').typeid) )
writeln("entry (1,'DateField'): ", TabinfoA(1,'DateField'),
" has type ", ATYPE(TabinfoA(1,'DateField').typeid) )
! Use contents as numerical values:
forall(i in RA) writeln(i, " ",TabinfoA(i,'NumField').real)
writeln("Sum of NumFields: ", sum(i in RA) TabinfoA(i,'NumField').real)
!**** Unnamed index sets
initializations from drvfile
! Selection of fields by name
TabinfoA2 as "dataframe;skiph;[](StrField,DateField,BoolField)"
! Column range specification; selection of fields by order number
TabinfoA3 as "dataframe;skiph;[B:E](#1,#4)"
end-initializations
writeln("A2: Fields:", TabinfoA2.index(2))
writeln("Typed data:", TabinfoA2)
writeln("A3: Fields:", TabinfoA3.index(2))
writeln("Typed data:", TabinfoA3)
end-procedure
!**********************************************************
! This routine creates 3 new files (after deleting any already existing versions)
! of the specified type each time it is invoked
procedure writedf(ftype: string)
! Cleaning up any existing files
fdelete("dframeout."+ftype)
fdelete("dframeout2."+ftype)
fdelete("dframeout3."+ftype)
fdelete("dframeout4."+ftype)
! Writing out field names with the array (option skiph+)
fname:=formattext("mmsheet.%s:dframeout.%1$s",ftype)
initialisations to fname
Ar as 'dataframe;skiph+;[A:F]'
end-initialisations
! Pre-populated re-ordered field names in the first N columns
! Writing the array without field names (option skiph)
fname:=formattext("mmsheet.%s:dframeout2.%1$s",ftype)
L:=["C_s","C_e","C_d","C_c","C_b","C_a"]
initialisations to fname
L as '[A:F]'
Ar as 'dataframe;skiph;[]'
end-initialisations
! Pre-populated re-ordered field names in some column range
! Selection of fields to be written out
fname:=formattext("mmsheet.%s:dframeout3.%1$s",ftype)
initialisations to fname
L as '[R2C3:R2C8]'
evaluation of array(i in Ar.index(1),j in ["C_e","C_s"])
Ar(i,j) as 'dataframe;skiph;grow;[R2C3:R2C8](C_e,C_s)'
end-initialisations !)
! Writing out a range without any header line
! (Only possible if second index set is of type 'range')
fname:=formattext("mmsheet.%s:dframeout4.%1$s",ftype)
initialisations to fname
A2 as 'dataframe;[A:F]'
end-initialisations
end-procedure
!**********************************************************
writeln("****CSV")
readdf("mmsheet.csv:dframedata.csv")
writedf("csv")
writeln("****XLSX")
readdf("mmsheet.xlsx:dframedata.xlsx")
writedf("xlsx")
writeln("****XLS")
readdf("mmsheet.xls:dframedata.xls")
writedf("xls")
end-model
|
© 2001-2025 Fair Isaac Corporation. All rights reserved. This documentation is the property of Fair Isaac Corporation (“FICO”). Receipt or possession of this documentation does not convey rights to disclose, reproduce, make derivative works, use, or allow others to use it except solely for internal evaluation purposes to determine whether to purchase a license to the software described in this documentation, or as otherwise set forth in a written software license agreement between you and FICO (or a FICO affiliate). Use of this documentation and the software described in it must conform strictly to the foregoing permitted uses, and no other use is permitted.
