Post Reply 
CSV: Load and Parse CSV files and strings
01-15-2017, 11:16 PM (This post was last modified: 01-16-2017 12:26 AM by StephenG1CMZ.)
Post: #1
CSV: Load and Parse CSV files and strings
First version requires CSV files to have an index number and for the "comma" to be unique (if embedded in strings, additional fields are made).

Stephen Lewkowicz (G1CMZ)
https://my.numworks.com/python/steveg1cmz
Visit this user's website Find all posts by this user
Quote this message in a reply
01-15-2017, 11:19 PM (This post was last modified: 01-16-2017 12:25 AM by StephenG1CMZ.)
Post: #2
RE: CSV: Load and Parse CSV files and strings
Version 0.1

Main routines are:
CSV_Load({list of files}), returns list of records
CSV_Parse1(string), returns list of fields
CSV_Process1(string), parses and saves fields
(See source for details)

Code:

 
 LOCAL CSV_ID:="CSV V 0.1 © 2017 StephenG1CMZ";
 LOCAL ST;
 LOCAL LF:=CHAR(10);
 LOCAL COMMA:=",";
 LOCAL CR:=CHAR(13);
 LOCAL YL:=19;
 LOCAL FL:="G1CMZ_CSV_TEMP";

 LOCAL SHO_PROGRESS:=1;//TEST VALUE:IT IS A PARAMETER AND NOT CUSTOMISED
 //LOCAL EMBEDDED:=0;//TEST VALUE 1=PARSE COMMA WITHIN QUOTES

 LOCAL CSV_DATA:={};
 LOCAL FIELDS:={};

 LOCAL SHOW_RECORDS:=0;//FOR DEBUGGING

 CSV_IDS()
 BEGIN
  TEXTOUT_P(CSV_ID,0,210,1); 
 END;

 CSV_ABOUT()
 BEGIN
  RECT_P();
  CSV_IDS();
  WAIT; 
 END;

 JUNKA(FS)
 BEGIN
  IF SIZE(FS) THEN //PRECAUTION
   DelAFiles(FS);//Garbage Gone
  END;
 END;
 EXPORT CSV_SCR_GET()
 //RESTORE USER SCREEN
 BEGIN
  G0:=AFiles(FL);
  JUNKA(FL);
 END;
 //GET AND PUT ARE NOT INCLUDED WITHIN LOAD IN ORDER
 //TO IMPROVE FLEXIBILITY AND TIMING. ALWAYS PUT BEFORE GET. 
 EXPORT CSV_SCR_PUT()
 //SAVE USER SCREEN
 BEGIN
  AFiles(FL):=G0;
  //YOU CAN NOW RECT_P IF DESIRED TO HIDE USER SCREEN
 END;

 INSTRINGn(ST,TXT)
 //INSTRING RETURNING A LIST OF POSITIONS OF MATCHES
 //PREFIXED BY LST(1)-SIMPLIFIES TAKING STRINGS USING
 //   MID(ST,LST(II-1)+1,LST(II)-LST(II-1)
 BEGIN
  LOCAL LST:={0};
  LOCAL POSN:=1;

  REPEAT
   POSN:=INSTRING(MID(ST,POSN),TXT);
   //MSGBOX({SIZE(LST),POSN});
   IF POSN THEN
    LST(0):=POSN+LST(0);
    POSN:=LST(0)+1;
   END;
  
  UNTIL (POSN==0) OR (POSN≥DIM(ST));
 
  RETURN LST;
 END;

 CSV_SHO_PROGRESS_FILES(SHO_PROGRESS,IIO,NUMFILES,ST)
 BEGIN
   IF SHO_PROGRESS THEN   
     RECT_P(320/2,200,320,220);
     RECT_P(320/2,216,320/2+IP(320/2*IIO/NUMFILES)+10,220,RGB(0,255,0));
     TEXTOUT_P(ST,320/2,200);
     DRAWMENU("","",1,(IIO)+"/"+NUMFILES,"","G1CMZ");//this_shows_files  
     //PRINT();//DEBG
    END;
  END;

 CSV_Parse1(ST,POS1,POSN)
 //Parse 1 record (ST substring)
 //Here is where we handle embedded commas, or dont.
 //POS1 is char after previous comma
 //POSN IS LF (OR LAST CHAR IN FILE)
 //EMBEDDED IGNORED
 BEGIN 
  LOCAL COMMA1,COMMALST,II; 
  LOCAL REC,RECD,RECPOS;
  
  FIELDS:={};
  REC :=MID(ST,POS1,POSN-POS1+1); 
    //MSGBOX(REC);
  COMMALST:=INSTRINGn(REC,COMMA);
  IF SIZE(COMMALST) THEN   
   FOR II FROM  2 TO SIZE(COMMALST) DO
      //RECD:=REPLACE(REC,CHAR(34),"");//CLEAN QUOTES
    FIELDS(0):=MID(REC,COMMALST(II-1)+1,COMMALST(II)-COMMALST(II-1)-1);
   END;   
   FIELDS(0):=MID(REC,COMMALST(0)+1);//FINAL FIELD OF LINE:LF INCLUDED IF PRESENT
  END; 
  RETURN FIELDS;
 END;

 CSV_Process1(ST,POS1,POSN,IIO,NUMFILES,XXFIELD,RNSCALE)
 // Processes a record
 // Saves record (if RN found or not required)
 // Show progress through records?
 // − MAY INCLUDE BLANK LINES
 // ST POS1 POSN: define record
 // IIO NUMFILES: used by progress report
 // XXFIELD: Fieldnumber containing PPL list index RN
 // (0=store Consecutively TBD)
 // RNSCALE: Multiplier may convert real to integer for RN (1=Unscaled) 
 // EMBEDDED: PARSE EMBEDDED COMMAS 
 BEGIN 
  LOCAL II;
  LOCAL RN;//NUMERIC RECORD NUMBER 
  
  IF (POSN-1-POS1)>0 THEN
   // PARSE THIS
   FIELDS:=CSV_Parse1(ST,POS1,POSN); 
 
   IF SHOW_RECORDS THEN //DEBUG DISPLAY CONTENTS 
    RECT_P(0,0,320,200);
    FOR II FROM 1 TO SIZE(FIELDS) DO
       TEXTOUT_P((II)+": "+FIELDS(II),0,13*(II));
    END;
   END;
   //WAIT;

   
    IF XXFIELD AND XXFIELD≤SIZE(FIELDS) THEN //EXPECT NUMBERED RECORDS:AND FIELD EXISTS
     RN:=IP(RNSCALE*EXPR(FIELDS(XXFIELD)));
     IF RN>0 AND RN<10000 THEN //VALID NUMBER
      IF RN≤SIZE(CSV_DATA) THEN 
       IF CSV_DATA(RN)≠0 THEN
        //MSGBOX("DUPLICATE"+RN);
        //NO ACTION=>REPLACE WITH 2ND
       END;  
      END; 
      IF SHO_PROGRESS AND 1 THEN  
       DRAWMENU("","",FIELDS(XXFIELD),(IIO+"/"+NUMFILES),"","G1CMZ"); //THIS SHOWS RECORD NUMBERS
      END; 
      CSV_DATA(RN):=FIELDS; 
      IF SIZE(FIELDS)>12 AND 0 THEN //DEBUG
        DRAWMENU("13");
        WAIT;
      END;
      //MSGBOX("DATA"+RN);      
     END;   
    END;      
   //ELSE
      //BLANK LINE : SKIP PARSING THIS LINE
   END;
      //WAIT;
 END;

 EXPORT CSV_Load(FileLST,OFFSET,XXFIELD,RNSCALE,SHO_PROGRESS)
 //FileLST: List Of files all in same format
   //EG "AIRPORTS"+{0,1}+".DAT" or {"A","B"}   
   //Used Because Prime cannot handle 1 big file
 //OFFSET: ADJUST VISIBLE FILE NUMBER
   //EG 0={1,2} (EG "A","B") −1 displays as {0,1} EG AIR0,AIR1
 //XXFIELD:INDEX TO FIELD CONTAINING LIST NUMBER (0=NO NUMBER,CONSECUTIVE,TBD)
   //RECORDS SHORTER ARE NOT STORED
 //RNSCALE:ALLOWS REALS TO BE USED AS INDEX (DEFAULT 1)
 //SHO_PROGRESS
  //0=JUST LOAD (QUICKER)
  //1=SHOW (CALLER CLEARS SCREEN)
 BEGIN
  LOCAL OKC;
  LOCAL II,JJ,TMP,POSN,POS1;
  LOCAL CHSTART,LFPOS;
  LOCAL FNAME,NUMFILES;
  LOCAL CHARCNTLST:={};
  LOCAL LINES:=1;

  CSV_IDS();
 
  IF SIZE(FileLST)==0 THEN
  // MAYBE ASK GUI, OR DO NOTHING
  END;
  NUMFILES:=SIZE(FileLST);
  IF NUMFILES>0 THEN
   //TEXTOUT_P("Loading "+(SIZE(FileLST))+" files",0,0);
   //QUICKLY ACCESS ALL FILES AND DISCOVER CHAR SIZE
   //THIS ENABLES QUICK ERROR IF A FILE IS MISSING
   //THE CHARACTER COUNT IS UNUSED (POSSIBLE PROGRESS USE)
   FOR II FROM 1 TO NUMFILES DO
    ST:=FileLST(II);
    //TMP:=Notes(ST);
    //CHARCNTLST(II):=SIZE(TMP);
    ////TEXTOUT_P(ST+" "+SIZE(TMP)+" chars",0,II*YL);
   END; 

   CSV_DATA:={};

   FOR II FROM 1 TO NUMFILES DO
    ST:=FileLST(II);
    CSV_SHO_PROGRESS_FILES(SHO_PROGRESS,II+OFFSET,NUMFILES,ST);
    TMP:=Notes(ST);
    CHARCNTLST(II):=DIM(TMP);
   
    POSN:=0;
    POS1:=1;
    
    REPEAT
      REPEAT 
       POSN:=POSN+1;
      UNTIL MID(TMP,POSN,1)==LF OR POSN==DIM(TMP);
      //MSGBOX(ASC(MID(TMP,POSN,1)));
      CSV_Process1(TMP,POS1,POSN,II+OFFSET,NUMFILES,XXFIELD,1);

      POS1:=POSN+1;
     UNTIL POSN==DIM(TMP);// 
     //BETWEEN FILE  
   END;//II 
   IF SHO_PROGRESS THEN
    // (PROBABLY UNSEEN )
    // POSSIBLY SHOW 100/100 IF UNCLEARED
    DRAWMENU("","",LINES,NUMFILES+"/"+NUMFILES);//10/10
   //USER IS BETTER ABLE TO VALIDATE SO LET HIM SHOW LSTSIZE/MAX
   //BUT COULD SHOW NUM OF MATCHES
   END;
  END;//IF

  //RESTORE USER SCREEN 
  //UNLESS NEED TO SHOW RESULTS
  //CSV_SCR_GET();
  RETURN CSV_DATA;
 END;

 CSV_TEST()
 BEGIN
  LOCAL XXNUM;
  LOCAL FLIST2:="AIR"+{"0","1","2","3","4","5","6","7","8","9"};
  LOCAL FLIST1:={"G7"};
  LOCAL TM;

  PRINT();
  RECT();//CLEAR SREEN
  PRINT("TEST");//EXAMPLE USER SCREEN
  XXNUM:=1;
  CSV_SCR_PUT();//SAVE USER SCREEN
  //RECT_P() HERE IF WE WISH TO DISCARD USER SCREEN
  TM:=TICKS; 
  //CSV_Load(FLIST1,−1,XXNUM,1,SHO_PROGRESS);
  CSV_DATA:=CSV_Load(FLIST2,−1,XXNUM,1,SHO_PROGRESS);
  CSV_SCR_GET();//RESTORE USER SREEN IF DESIRED
  TEXTOUT_P("Elapsed "+((TICKS-TM)/1000)+" s",0,200,2);//FOR ACCURATE TIMING TAKE TICKS BEFORE SCR_GET
 END;

 EXPORT CSV()
 BEGIN
  //PRINT();
  RECT();
  CSV_TEST();
  L1:=CSV_DATA;
  WAIT;
 END;

As an example of how to use this, see my program GEODATA.

Stephen Lewkowicz (G1CMZ)
https://my.numworks.com/python/steveg1cmz
Visit this user's website Find all posts by this user
Quote this message in a reply
Post Reply 




User(s) browsing this thread: 2 Guest(s)