Post Reply 
[newRPL] Filesystem related date/time issues
08-14-2016, 11:06 PM
Post: #1
[newRPL] Filesystem related date/time issues
Hi,

Having found some time to inspect the date/time related code in newRPL's FAT implementation, I added some comments (either helping me to understand and validate the code, or pointing at potential issues to be sorted out). The following snippets also illustrate some fixes and sanity checks I recommend to be included.

In order to keep the general newRPL thread readable, I'm opening a new thread for the topic...

https://sourceforge.net/p/newrpl/sources..._50g/rtc.c
Code:

int __getRTCvalue(int offset)
{
   // FIX 2016-08-12 MPL: volatile was missing
   unsigned char a = *((volatile unsigned char *)RTC_REGS + offset); // little endian

   return ((a&0xF0) >> 1) + ((a&0xF0) >> 3) + (a&0x0F); // convert BCD to binary
}

#define __getRTCSec()  __getRTCvalue(0x70) // 0..59
#define __getRTCMin()  __getRTCvalue(0x74) // 0..59
#define __getRTCHour() __getRTCvalue(0x78) // 0..23

#define __getRTCDay()  __getRTCvalue(0x7C) // 1..31
#define __getRTCMon()  __getRTCvalue(0x84) // 1..12
#define __getRTCYear() __getRTCvalue(0x88) // 00..98 (2000..2098), 99 (1999)

int rtc_getday()
{
   // FIX 2016-08-12 MPL: removed bcd definition, added missing return value.
   return __getRTCDay(); // 1-based
}

int rtc_getmon()
{
   return __getRTCMon(); // 1-based
}

int rtc_getyear()
{
   // DBG 2016-08-12 MPL: check RTC leap year & calendar rollover behaviour (99 is apparently defined as for 1999, not 2099 -- to be sorted out)
   return __getRTCYear() + 2000; // 2000-based
}

int rtc_getsec()
{
   return __getRTCSec(); // 0-based
}

int rtc_getmin()
{
   return __getRTCMin(); // 0-based
}

int rtc_gethour()
{
   return __getRTCHour(); // 0-based
}
https://sourceforge.net/p/newrpl/sources.../hal_api.h
Code:

struct compact_tm {
   unsigned tm_sec:6, // seconds after the minute 0-60*
             tm_min:6, // minutes after the hour 0-59
             tm_hour:5, // hours since midnight 0-23
             tm_mday:5, // day of the month 1-31 (0=day not set)
             // DBG 2016-08-12 MPL: Is there a particular reason why the
             // month entry is defined as a 0-based value, while the FAT dir
             // entry and the RTC register are 1-based? The FAT dir entry
             // allows for special value of 0 for "undefined", something we
             // cannot fully cope with at present
             tm_mon:4, // months since January 0-11
             tm_wday:3, // days since Sunday 0-6
             tm_isdst:1;
   unsigned int tm_year; // int years since 1900 (up to 2017 in FAT)
};
https://sourceforge.net/p/newrpl/sources...datetime.c
Code:

void FSGetDateTime(unsigned int *datetime, unsigned int *hundredths)
{
   unsigned int min, sec, hour, day, mon, year, flag = 0;

   do {
      year = rtc_getyear(); // Descending order is important here
      mon = rtc_getmon();
      day = rtc_getday();
      hour = rtc_gethour();
      min = rtc_getmin();
      sec = rtc_getsec();
      if (!sec) // if seconds rollover occured, loop around to reread all values 
         flag = ~flag; 
      // DBG 2016-08-12 MPL: Is one loop cycle enough for rollover
      // to be reliably propagated in RTC hardware? I've seen other code actually
      // looping until seconds become > 0 (which can cause delays up to 1 sec, though).
   } while (flag);
 
   year -= 1980; // fix from 1980 to 2079
   
   *datetime = (sec>>1) | (min<<5) | (hour<<11) | (day<<16) | (mon<<21) | (year<<25);            
   *hundredths = (sec&1) ? 100 : 0;

   return;
}
https://sourceforge.net/p/newrpl/sources...ritetime.c
Code:

// CHG 2016-08-13 MPL: Combined FSGetWriteTime(), FSGetCreatTime(), FSGetAccessDate() to avoid redundancy and save code

void FSGetDirStamp(FS_FILE *file, struct compact_tm *dt, unsigned char type_stamp)
{
   unsigned int a, b;

   // DBG 2016-08-13 MPL: could be much further optimized by
   // changing organization of FS_FILE struct and "dirty" coding
   if (type_stamp == 0) // access
      b = a = file->LastAccDate; // points to 16-bit value only
   else {
      if (type_stamp == 1) // modify
         a = file->WriteTimeDate;
      else // create
         a = file->CreatTimeDate;

      b = (a << 1) & 0x1F;
      // FIX 2016-08-12 MPL: ignore if greater than 29, no (valid) time record
      dt->tm_sec = (b <= 29) ? b : 0;
   
      b = (a >> 5) & 0x3F;
      // FIX 2016-08-12 MPL: ignore if greater than 59, no (valid) time record
      dt->tm_min = (b <= 59) ? b : 0;

      b = (a >> (5+6)) & 0x1F;
      // FIX 2016-08-12 MPL: ignore if greater than 23, no (valid) time record
      dt->tm_hour = (b <= 23) ? b : 0;

      b = a >> (5+6+5);
   }

   b &= 0x1F;
   // FIX 2016-08-12 MPL: set to 0 if greater than 31, indicate invalid day
   dt->tm_day = (b <= 31) ? b : 0; // value 0 = RTC (day) not set

   if (type_stamp) // modify and create
      b = a >> (5+6+5+5);
   else // access
      b = a >> 5;
   b &= 0x0F;
   // FIX 2016-08-12 MPL: set to 0 if greater than 12, indicate invalid month
   // DBG 2016-08-12 MPL: avoid underflow, but value 0 not properly dealt
   // with yet, alternatively we could set dt->tm_day to 0 to invalidate the
   // date as a whole
   dt->tm_mon = (b && (b <= 12)) ? b-1 : 0; // (value 0 = RTC (month) not set)

   if (type_stamp) // modify and create
      b = a >> (5+6+5+5+4);
   else { // access
      b = a >> (5+4);
      b &= 0x7F; // 0..127 for 1980..2107
   }
   dt->tm_year = b + 80; // count years from 1900 instead of 1980

   // DBG 2016-08-12 MPL: To be calculated from date
   dt->tm_wday = 0;
   // dt->tm_yday = 0;

   dt->tm_isdst = 0;

   return;
}

void FSGetWriteTime(FS_FILE *file, struct compact_tm *dt)
{
   FSGetDirStamp(file, dt, 1);

   return;
}
https://sourceforge.net/p/newrpl/sources...reattime.c
Code:

void FSGetCreatTime(FS_FILE *file, struct compact_tm *dt)
{
   FSGetDirStamp(file, dt, 2);

   dt->tm_sec += file->CrtTmTenth/100;

   return;
}
https://sourceforge.net/p/newrpl/sources...cessdate.c
Code:

void FSGetAccessDate(FS_FILE *file, struct compact_tm *dt)
{
   FSGetDirStamp(file, dt, 0);

   dt->tm_sec = 0;
   dt->tm_min = 0;
   dt->tm_hour = 0;

   return;
}
Hope it helps,

Matthias


--
"Programs are poems for computers."
Find all posts by this user
Quote this message in a reply
08-14-2016, 11:59 PM
Post: #2
RE: [newRPL] Filesystem related date/time issues
It seems like sourceforge has something that is called Merge Requests. I assume that is something similar to Pull Requests of github. Maybe it would be a good idea to use such feature when you collaborate on source code?
Find all posts by this user
Quote this message in a reply
08-15-2016, 02:44 AM (This post was last modified: 08-15-2016 02:47 AM by Claudio L..)
Post: #3
RE: [newRPL] Filesystem related date/time issues
(08-14-2016 11:06 PM)matthiaspaul Wrote:  Hi,

Having found some time to inspect the date/time related code in newRPL's FAT implementation, I added some comments (either helping me to understand and validate the code, or pointing at potential issues to be sorted out). The following snippets also illustrate some fixes and sanity checks I recommend to be included.

In order to keep the general newRPL thread readable, I'm opening a new thread for the topic...
Good idea.

(08-14-2016 11:06 PM)matthiaspaul Wrote:  https://sourceforge.net/p/newrpl/sources..._50g/rtc.c

Thanks for the bogus bcd line, don't even know why it was there. The year 2k problem is a non-issue on the calc, it was sorted out in a chip revision long before the 50g came out in 2006 (EDIT: should read: long before the 49G+ came out in 2003). The RTC assumes the 2 digits are 20xx, unless I misunderstood the Samsung docs.

(08-14-2016 11:06 PM)matthiaspaul Wrote:  
Code:

             tm_mday:5, // day of the month 1-31 (0=day not set)
             // DBG 2016-08-12 MPL: Is there a particular reason why the
             // month entry is defined as a 0-based value, while the FAT dir
             // entry and the RTC register are 1-based? The FAT dir entry
             // allows for special value of 0 for "undefined", something we
             // cannot fully cope with at present
The only reason is POSIX:
http://pubs.opengroup.org/onlinepubs/969...ime.h.html
I just took the struct tm and made it more compact.

(08-14-2016 11:06 PM)matthiaspaul Wrote:  https://sourceforge.net/p/newrpl/sources...datetime.c
Code:

      // DBG 2016-08-12 MPL: Is one loop cycle enough for rollover
      // to be reliably propagated in RTC hardware? I've seen other code actually
      // looping until seconds become > 0 (which can cause delays up to 1 sec, though).

I don't see why more than one loop would be needed. The Samsung docs don't imply anywhere that more than one loop could be needed, this is the only reference to the issue:
Quote:...there is no problem, but, if the value is 0 sec., the year, month, date, hour, and minute may be changed to 2060 (Year), 1 (Month), 1 (Date), 0 (Hour) and 0 (Minute) because of the one second deviation that was mentioned. In this case, the user should re-read from BCDYEAR to BCDSEC if BCDSEC is zero.

(08-14-2016 11:06 PM)matthiaspaul Wrote:  https://sourceforge.net/p/newrpl/sources...ritetime.c
Code:

// CHG 2016-08-13 MPL: Combined FSGetWriteTime(), FSGetCreatTime(), FSGetAccessDate() to avoid redundancy and save code
These are very simple functions that the compiler usually auto-inlines into the calling function. Combining them might prevent auto-inlining from working.
Regarding the sanitization of date/time: the file system does nothing with these values, so invalid dates don't disturb its operation. I think the user code that uses these dates/times is better positioned to decide what to do with an invalid date than the file system layer (which would just mark as invalid and discard everything), so it's better not to touch them.
For example, a program might be looking for all files on a certain year, then doesn't care if the month is invalid as long as the year is good. Other program could be sorting files by date, and needs all fields to be valid. That same program could prefer to list a month 13 after the other files that year (and month 14 would go after 13 and before 15), while if we force it to zero to indicate invalid month, it would have to list it first (and there's no difference between month 13, 14 or 15).
Another advantage of not sanitizing is that the conversion is reversible: if you read an invalid creation date writing it back with the same tm struct would recover the same invalid date, preserving whatever was on disk.
Find all posts by this user
Quote this message in a reply
08-15-2016, 12:13 PM (This post was last modified: 08-15-2016 04:49 PM by matthiaspaul.)
Post: #4
RE: [newRPL] Filesystem related date/time issues
Hi Claudio.
(08-15-2016 02:44 AM)Claudio L. Wrote:  The year 2k problem is a non-issue on the calc, it was sorted out in a chip revision long before the 50g came out in 2006 (EDIT: should read: long before the 49G+ came out in 2003). The RTC assumes the 2 digits are 20xx, unless I misunderstood the Samsung docs.
Strangely enough, however, I saw some code for this chip (family) where 99 was special-cased for 1999 (but perhaps that was for an older chip revision, I don't know). It wouldn't matter much if the chip wouldn't be responsible for the calculation of the length of February and (probably also) the day-of-week. I didn't read this up in the reference manuals yet...

(08-15-2016 02:44 AM)Claudio L. Wrote:  
(08-14-2016 11:06 PM)matthiaspaul Wrote:  
Code:

             tm_mday:5, // day of the month 1-31 (0=day not set)
             // DBG 2016-08-12 MPL: Is there a particular reason why the
             // month entry is defined as a 0-based value, while the FAT dir
             // entry and the RTC register are 1-based? The FAT dir entry
             // allows for special value of 0 for "undefined", something we
             // cannot fully cope with at present
The only reason is POSIX:
http://pubs.opengroup.org/onlinepubs/969...ime.h.html
I just took the struct tm and made it more compact.
I see. There were two reasons for that comment. I saw special -1 adjustments for it in three places (in the original unfolded code, that is), which could have been replaced by possibly fewer +1 adjustments on the other end. More important, for both entries, day and month, the FAT filesystem actually defines a value of 0 as valid to indicate "not set". Most code examples I have seen would display this with the date or month set to 0 (most often as "1980-00-00"), while others use it to suppress the display/usage of that value. The hardwired -1 in your code makes the month value underflow under that condition, which basically lumps this special case together with invalid month values beyond December. It very much depends on the higher level code if it would cope with this correctly or not.

(08-15-2016 02:44 AM)Claudio L. Wrote:  
(08-14-2016 11:06 PM)matthiaspaul Wrote:  https://sourceforge.net/p/newrpl/sources...datetime.c
Code:

      // DBG 2016-08-12 MPL: Is one loop cycle enough for rollover
      // to be reliably propagated in RTC hardware? I've seen other code actually
      // looping until seconds become > 0 (which can cause delays up to 1 sec, though).
I don't see why more than one loop would be needed. The Samsung docs don't imply anywhere that more than one loop could be needed, this is the only reference to the issue:
Quote:...there is no problem, but, if the value is 0 sec., the year, month, date, hour, and minute may be changed to 2060 (Year), 1 (Month), 1 (Date), 0 (Hour) and 0 (Minute) because of the one second deviation that was mentioned. In this case, the user should re-read from BCDYEAR to BCDSEC if BCDSEC is zero.
So far, I have zero experience with this Samsung chip, so I don't know either.

I don't know if the Samsung chip ever had this issue (probably not), but interpreting Samsung's advise above very strictly, it could well mean that the whole bunch of regs should be reread *until* BCDSEC is not (no longer, that is) zero. On the other hand, their explanation regarding that "one second deviation that was mentioned" and the "there is no problem" indicate no such issue.

It's well possible (probably!) that the Samsung RTC registers are latched internally so that this is really a non-issue. However, in the past I have seen more than one RTC where the registers were not latched and the rollover took some time to be propagated internally - with such chips chances were that you still caught the old transient (year etc.) values on subsequent reads for some while and had to wait for the actual change to happen (complicated code) or just waited some arbitrary, but long enough time.

(Being somewhat shocked I saw code for that very Samsung chip doing just this, waiting for up to a whole second reading out the RTC. I don't have the link handy any more, though. I have also seen examples performing only one loop at max, though, so that former example might have been down to either "careless coding" or "working around some issue", I don't know. That's why I brought this up to our attention here.)

(08-15-2016 02:44 AM)Claudio L. Wrote:  
(08-14-2016 11:06 PM)matthiaspaul Wrote:  https://sourceforge.net/p/newrpl/sources...ritetime.c
Code:

// CHG 2016-08-13 MPL: Combined FSGetWriteTime(), FSGetCreatTime(), FSGetAccessDate() to avoid redundancy and save code
These are very simple functions that the compiler usually auto-inlines into the calling function. Combining them might prevent auto-inlining from working.
Actually, I hope for the compiler to do that (well, folding code rather than inlining - speed is not really an issue here, size is) otherwise this coding style would be quite "expensive" on an embedded system.

(My experience with the not so stellar optimization performance of GCC-based compilers stems from other platforms, though.)

I was tempted to introduce some dirty pointer indexing two bytes in front of the 16-bit lastaccess entry (and later just throw away/overwrite the bogus time value) building on the symmetry with the cases for the two 32-bit entries. That would remove all those special cases and considerably shrink the code - probably quite beyond to what the compiler could do. However, the easiest way to achieve this would be by some rearrangements in the FS_FILE struct so that negative indexing could be avoided, but I didn't want to touch that at this time.

Speaking about compiler optimization, without that added "volatile" there was some risk that a highly-optimizing compiler would have thrown out the RTC readout loop. After all, the compiler has no indication that the read values could ever change, so they only had to be read once (before the loop), and given the fact that flag is not used elsewhere in the code, the remaining loop reading out seconds twice could have been treated as just a short delay without external sideeffects, which then could be replaced by a single read of seconds as well. That "volatile" makes sure that the rollover case is actually treated specially regardless of the compiler's performance and configuration.

(08-15-2016 02:44 AM)Claudio L. Wrote:  Regarding the sanitization of date/time: the file system does nothing with these values, so invalid dates don't disturb its operation. I think the user code that uses these dates/times is better positioned to decide what to do with an invalid date than the file system layer (which would just mark as invalid and discard everything), so it's better not to touch them.
For example, a program might be looking for all files on a certain year, then doesn't care if the month is invalid as long as the year is good. Other program could be sorting files by date, and needs all fields to be valid. That same program could prefer to list a month 13 after the other files that year (and month 14 would go after 13 and before 15), while if we force it to zero to indicate invalid month, it would have to list it first (and there's no difference between month 13, 14 or 15).
That's right, and could be adjusted to use another special value to indicate "invalid". The reason why I proposed to use 0 is because that's what is actually used on disk by the FAT filesystem to indicate "month not set" or "day not set" (see the other comment above).

I don't think it would be a good idea to propagate invalid values (except for one "neutral" value) up to the frontend (RPL), as users are expecting "mathematical correctness" here (perhaps even doing some calendar calculations on it and just relying on that the values are either well-formatted or clearly indicated as invalid). They probably don't expect date functions to return impossibly high days and months (even if that would happen only in case the filesystem stores invalid entries - ensuring that it hasn't, is, I think, outside a user's responsibility and scope).

Being able to search for "month 13 or 14" might make sense on a lower level - for a lower level API function, perhaps. However, I'm not sure if that would still hold true on a calculator. Are you expecting people to write CHKDSK-like tools to be used on the calc?

(Sidenote: Except for the two special values of 0 for months and days, the only invalid values I have ever encountered in practise on FAT volumes were hour = 24, minute = 60 and second/2 = 30 as well as invalid days in February (but still not larger than 31.
However, given the space constraints in directory entries, I was, at the expense of disturbing the date info on unaware systems, experimenting with how to possibly use these invalidly high values to detect and encode additional file-related information in an advanced but mostly backward compatible FAT32 variant as (free) exFAT alternative (actually a framework of multiple optional features/properties) in a way that keeps implementation overhead at a minimum even on "legacy" and in embedded systems. That certainly does not need to be supported in newRPL, but that's why I don't assume any meaningful date or time related information in invalidly high date and time values.)
Quote:Another advantage of not sanitizing is that the conversion is reversible: if you read an invalid creation date writing it back with the same tm struct would recover the same invalid date, preserving whatever was on disk.
That's a very good point.

I was living under the impression, though, that routines such as fsupdatedirentry() (discussed in the original thread) would deal with the raw 16-bit and 32-bit values as stored in the FS_FILE struct, not with the somewhat "higher level" tm struct as used by FSGetWriteTime(), FSGetCreatTime(), FSGetAccessDate(). Either way, regarding easier reversibility, then perhaps the -1 month alignment for POSIX should be abandoned, as you would have to revert that as well otherwise. This would also free the value 0 for "not set". Does some already existing high-level code depend on that POSIX alignment?

Greetings,

Matthias


--
"Programs are poems for computers."
Find all posts by this user
Quote this message in a reply
08-15-2016, 02:51 PM
Post: #5
RE: [newRPL] Filesystem related date/time issues
(08-15-2016 12:13 PM)matthiaspaul Wrote:  Strangely enough, however, I saw some code for this chip (family) where 99 was special-cased for 1999 (but perhaps that was for an older chip revision, I don't know). It wouldn't matter much if the chip wouldn't be responsible for the calculation of the length of February and (probably also) the day-of-week. I didn't read this up in the reference manuals yet...
Because the chip uses only 2 digits, originally those digits could mean 00 = 1900 or 00 = 2000, at the user's choice. But not quite, because the original chip lacked the special 400-year leap year that happened in 2000. This is not a problem for us in 2016 obviously, it was only a problem on Feb 28, 2000 near midnight :-).
Later revisions of the chip implemented the leap year, but hardwired to be applied always. This means 00 = 2000, and user can no longer assume that 00 = 1900.
Other than that, this RTC just works fine, I assumed 00-99 = 2000-2099.

(08-15-2016 12:13 PM)matthiaspaul Wrote:  
(08-15-2016 02:44 AM)Claudio L. Wrote:  The only reason is POSIX:
http://pubs.opengroup.org/onlinepubs/969...ime.h.html
I just took the struct tm and made it more compact.
I see. There were two reasons for that comment. I saw special -1 adjustments for it in three places (in the original unfolded code, that is), which could have been replaced by possibly fewer +1 adjustments on the other end. More important, for both entries, day and month, the FAT filesystem actually defines a value of 0 as valid to indicate "not set". Most code examples I have seen would display this with the date or month set to 0 (most often as "1980-00-00"), while others use it to suppress the display/usage of that value. The hardwired -1 in your code makes the month value underflow under that condition, which basically lumps this special case together with invalid month values beyond December. It very much depends on the higher level code if it would cope with this correctly or not.
Somebody could "split" this file system into a separate library and use it outside newRPL. In that case, they would expect the tm structure values returned to be in line with the POSIX standard struct tm, so I'm inclined to keep it that way. The 0 value to indicate not set I think it affects the whole 16-bit number: if the half-word is zero, then the date is not set, I don't think it was meant to be interpreted for individual fields. I can easily add support for that returning an error on those functions if the date is zero/invalid.

(08-15-2016 12:13 PM)matthiaspaul Wrote:  So far, I have zero experience with this Samsung chip, so I don't know either.

I don't know if the Samsung chip ever had this issue (probably not), but interpreting Samsung's advise above very strictly, it could well mean that the whole bunch of regs should be reread *until* BCDSEC is not (no longer, that is) zero. On the other hand, their explanation regarding that "one second deviation that was mentioned" and the "there is no problem" indicate no such issue.

It's well possible (probably!) that the Samsung RTC registers are latched internally so that this is really a non-issue. However, in the past I have seen more than one RTC where the registers were not latched and the rollover took some time to be propagated internally - with such chips chances were that you still caught the old transient (year etc.) values on subsequent reads for some while and had to wait for the actual change to happen (complicated code) or just waited some arbitrary, but long enough time.

(Being somewhat shocked I saw code for that very Samsung chip doing just this, waiting for up to a whole second reading out the RTC. I don't have the link handy any more, though. I have also seen examples performing only one loop at max, though, so that former example might have been down to either "careless coding" or "working around some issue", I don't know. That's why I brought this up to our attention here.)
Don't be so shocked, I've seen weird stuff out there. I didn't actually test it but the way I interpreted the docs only one read is needed, all internal registers are updated at once and have no reason to suspect otherwise. It wouldn't be hard to test it though.

(08-15-2016 12:13 PM)matthiaspaul Wrote:  Actually, I hope for the compiler to do that (well, folding code rather than inlining - speed is not really an issue here, size is) otherwise this coding style would be quite "expensive" on an embedded system.
I just checked the binaries, the compiler did not inline them.
You got me curious now. I'll count how many words these 3 functions take, then change them with your code and recompile, then count how many words it takes. My guess is by the time you add the tests/branching, additional argument, etc. it will be relatively close.

(08-15-2016 12:13 PM)matthiaspaul Wrote:  I was tempted to introduce some dirty pointer indexing two bytes in front of the 16-bit lastaccess entry (and later just throw away/overwrite the bogus time value) building on the symmetry with the cases for the two 32-bit entries. That would remove all those special cases and considerably shrink the code - probably quite beyond to what the compiler could do. However, the easiest way to achieve this would be by some rearrangements in the FS_FILE struct so that negative indexing could be avoided, but I didn't want to touch that at this time.
I'm not sure it's worth the trouble. You'll save a few words but the code will be harder to follow for others. At this stage I don't want to obsess too much about code compactness or even speed. I'm more into portability and easy to read for others to help me (like you are doing right now). The main goal is: code correctness, proper error management, and general bug hunting (like that volatile you pointed out above).
Once newRPL 1.0 is released, I'll start aggressive optimizations all over the place (decimal math routines could use some assembler tricks, transcendental functions are particularly slow and in need of serious optimization).
But that's further down the road, if the code is clean enough it will be easier to optimize.

(08-15-2016 12:13 PM)matthiaspaul Wrote:  Speaking about compiler optimization, without that added "volatile" there was some risk that a highly-optimizing compiler would have thrown out the RTC readout loop.
Yes, that was a bug, it should've been volatile, thanks.


(08-15-2016 12:13 PM)matthiaspaul Wrote:  I don't think it would be a good idea to propagate invalid values (except for one "neutral" value) up to the frontend (RPL), as users are expecting "mathematical correctness" here (perhaps even doing some calendar calculations on it and just relying on that the values are either well-formatted or clearly indicated as invalid). They probably don't expect date functions to return impossibly high days and months (even if that would happen only in case the filesystem stores invalid entries - ensuring that it hasn't, is, I think, outside the a user's responsibility and scope).
Being able to search for "month 13 or 14" might make sense on a lower level - for a lower level API function, perhaps. However, I'm not sure if that would still hold true on a calculator. Are you expecting people to write CHKDSK-like tools to be used on the calc?
I didn't mean to pass it on to the user, perhaps wasn't clear. lib74 commands would have to filter that information properly before passing to the user. But if I build a filer, it could internally sort using the illegal values with no problem (getting the values directly from the file system, not from the user-facing RPL commands).


(08-15-2016 12:13 PM)matthiaspaul Wrote:  I was living under the impression, though, that routines such as fsupdatedirentry() (discussed in the original thread) would deal with the raw 16-bit and 32-bit values as stored in the FS_FILE struct, not with the somewhat "higher level" tm struct as used by FSGetWriteTime(), FSGetCreatTime(), FSGetAccessDate(). Either way, regarding easier reversibility, then perhaps the -1 month alignment for POSIX should be abandoned, as you would have to revert that as well otherwise. This would also free the value 0 for "not set". Does some already existing high-level code depend on that POSIX alignment?
You are correct, the file system internally does not use the tm struct at all, and does not use or manipulate the dates in any way.
The tm struct is only for the external API.
So far nothing in newRPL *needs* the POSIX conformance, it just makes it easy to reuse the file system module in other projects too Smile
I can find other ways to pass an "invalid date" error, more in line with the rest of the library's error reporting.
Find all posts by this user
Quote this message in a reply
09-11-2016, 10:51 PM (This post was last modified: 09-19-2016 07:16 PM by matthiaspaul.)
Post: #6
RE: [newRPL] Filesystem related date/time issues
The FAT file system supports dates from 1980-01-01 to 2107-12-31 (although not all implementations support dates after the 2099-12-31).

As discussed above, the newRPL code currently maps Samsung RTC register values 00 to 99 to years 2000 to 2099.

Since we have 2016 already, RTC year values lower than 16 should not normally occur in conjunction with any newRPL version supporting SD cards. If we take into consideration the possibility to backport the newRPL code so it could be used also in HPGCC-based libraries with the default HP 49g+/50g firmware, we might want to support a few earlier RTC year values as well, but hardly before ca. 2008 when the FAT library for HPGCC was originally developed.

I would therefore like to suggest to expand the range of supported years by eight more years to the full range supported by the FAT filesystem simply by mapping RTC year values 00 to 07 to years 2100 to 2107, while continuing to map 08 to 99 to years 2008 to 2099 as before. This would not increase the code by more than a couple of bytes.

Of course, the RTC hardware would continue to associate year values 00 to 07 with 2000 to 2007 rather than 2100 to 2107 internally, so the calculation of the day-of-week would become incorrect starting 2100. Fortunately, newRPL does not make use of the hardware day-of-week, so this does not impose a problem at all. Would newRPL need the day-of-week in the future, it would have to implement a day-of-week algorithm in software, anyway, since the hardware does not support dates below 2000 whilst the FAT file system does (and date stamps with years 1980 to 1999 are not unlikely to occur on media, so they can't be ignored).
The RTC hardware would also miscalculate leap years. However, for the span 2100 to 2107 there is a single date, where the calculation would effectively differ: The RTC hardware would assume 2100 to be a leap year and therefore 2100-02-28 to be followed by 2100-02-29, while in reality 2100 is no leap year and the 2100-02-28 will be followed by the 2100-03-01. The other leap year in the range, 2104, would be handled correctly, even if the RTC hardware would assume this to be 2004 internally.

So, the only drawback expanding the supported year range up to 2107 would be that users would have to manually increase the date by one day on or after the 2100-03-01 ("2100-02-29") a single time, and only when using the physical 49g+/50g calculator. The problem would not exist in the emulator. (It would be even possible to add code to silently correct the date, but I don't know if it is worth to add perhaps 100 bytes of code to cope with an event occuring only once. A note in the manual might do as well.)

https://sourceforge.net/p/newrpl/sources..._50g/rtc.c
Code:

int rtc_getyear()
{
   // CHG 2016-09-12 MPL: Max out FAT FS year range to 2107.
   int year;

   year = __getRTCYear() + 2000; // 2000-based
   if (year <= 2007) // RTC values 00..07 mapped to 2100..2107 (max FAT FS year)
      year += 100;   // Caveat: HW treats 2100 as leap year, so user will have
                     // to advance RTC by one day on/after 2100-03-01 once.

   return year;
}

https://sourceforge.net/p/newrpl/sources...datetime.c
With the above range expansion in place, this comment should be changed to:
Code:

year -= 1980;        // FAT epoch is 1980. Map 2008..2107 to 28..127
Without that expansion applied, the comment still needs to be changed as follows:
Code:

year -= 1980;        // FAT epoch is 1980. Map 2000..2099 to 20..119

Another case that would need checking: Does the RTC roll over from year 99 to 00, or does it stay at 99? If it does not roll over, users would have to manually set the date on or after the 2100-01-01 as well.

Greetings,

Matthias


--
"Programs are poems for computers."
Find all posts by this user
Quote this message in a reply
09-12-2016, 01:38 PM
Post: #7
RE: [newRPL] Filesystem related date/time issues
(09-11-2016 10:51 PM)matthiaspaul Wrote:  ...
So, the only drawback expanding the supported year range up to 2107 would be that users would have to manually increase the date by one day on or after the 2100-03-01 ("2100-02-29") a single time, and only when using the physical 49g+/50g calculator. The problem would not exist in the emulator. (It would be even possible to add code to silently correct the date, but I don't know if it is worth to add perhaps 100 bytes of code to cope with an event occuring only once. A note in the manual might do as well.)
The only question I have is: why bother shifting the scale by 7 years? HP calcs last long, but I don't think they will be around in 2100. I don't expect people to use a FAT filesystem in 2100, they shouldn't be even coding in 2100 (using neural interfaces of some sort to get machines to do what they want).

(09-11-2016 10:51 PM)matthiaspaul Wrote:  Another case that would need checking: Does the RTC roll over from year 99 to 00, or does it stay at 99? If it does not roll over, users would have to manually set the date on or after the 2100-01-01 as well.

It was supposed to roll over from 1999 to 2000, there's no reason to suspect it won't, but in any case, this hardware won't be running in 84 years so it's not a problem I need to address now. I'll leave it for my grand-grand-children to fix if they are interested.
Find all posts by this user
Quote this message in a reply
09-17-2016, 11:49 PM
Post: #8
RE: [newRPL] Filesystem related date/time issues
Hi Claudio.
(08-14-2016 11:06 PM)matthiaspaul Wrote:  https://sourceforge.net/p/newrpl/sources..._50g/rtc.c
Code:

int rtc_getday()
{
   // FIX 2016-08-12 MPL: removed bcd definition, added missing return value.
   return __getRTCDay(); // 1-based
}
(08-15-2016 02:44 AM)Claudio L. Wrote:  Thanks for the bogus bcd line, don't even know why it was there. The year 2k problem is a non-issue on the calc, it was sorted out in a chip revision long before the 50g came out in 2006 (EDIT: should read: long before the 49G+ came out in 2003). The RTC assumes the 2 digits are 20xx, unless I misunderstood the Samsung docs.
Something must have gone wrong here, as the code at SourceForge still shows
Code:

int rtc_getday()
{
    int bcd=__getRTCDay();
}
after the September update.

Greetings,

Matthias


--
"Programs are poems for computers."
Find all posts by this user
Quote this message in a reply
09-18-2016, 03:36 AM
Post: #9
RE: [newRPL] Filesystem related date/time issues
(09-17-2016 11:49 PM)matthiaspaul Wrote:  Something must have gone wrong here, as the code at SourceForge still shows
Code:

int rtc_getday()
{
    int bcd=__getRTCDay();
}
after the September update.

I fixed that the same day you posted it, I remember even committing that change. Perhaps I forgot to push it to the repos! I was using a VM with FreeBSD and an update left it unusable sometime last month, so I ended up installing Ubuntu Mate from scratch and cloning the repos again. If I forgot to push before it went bad, I wonder what other fixes got lost... that's very bad news.
Find all posts by this user
Quote this message in a reply
09-21-2016, 07:43 PM
Post: #10
RE: [newRPL] Filesystem related date/time issues
After the September update, these three files
https://sourceforge.net/p/newrpl/sources...ritetime.c
https://sourceforge.net/p/newrpl/sources...reattime.c
https://sourceforge.net/p/newrpl/sources...cessdate.c
contain the line
Code:

  dt->tm_year = 0;
to return a default year of 1900 when the corresponding date entry in the FAT file system is all 0 (which is a special case for "undefined"). (As discussed further above, it is technically possible to independently set the month and/or the day to "undefined", but not the year, but it is probably okay to not distinguish between these cases.)

While it is not "wrong" to set this to 1900 (because, basically, it stands for "undefined"), in the context of the FAT filesystem this special date is normally
set to 1980 by convention. It might confuse users, if FAT dates given as
1980-00-00 under DOS and Windows are displayed as 1900-00-00
in newRPL. People might even search for such dates in order to identify files modified while the (PC) RTC wasn't set.

So, unless there are strong reasons to map this to 1900, I would suggest to map it to 1980 instead.
Code:

  dt->tm_year = 80;

Greetings,

Matthias


--
"Programs are poems for computers."
Find all posts by this user
Quote this message in a reply
09-22-2016, 08:13 PM (This post was last modified: 09-22-2016 08:14 PM by Claudio L..)
Post: #11
RE: [newRPL] Filesystem related date/time issues
(09-21-2016 07:43 PM)matthiaspaul Wrote:  After the September update, these three files
https://sourceforge.net/p/newrpl/sources...ritetime.c
https://sourceforge.net/p/newrpl/sources...reattime.c
https://sourceforge.net/p/newrpl/sources...cessdate.c
contain the line
Code:

  dt->tm_year = 0;
to return a default year of 1900 when the corresponding date entry in the FAT file system is all 0 (which is a special case for "undefined"). (As discussed further above, it is technically possible to independently set the month and/or the day to "undefined", but not the year, but it is probably okay to not distinguish between these cases.)

While it is not "wrong" to set this to 1900 (because, basically, it stands for "undefined"), in the context of the FAT filesystem this special date is normally
set to 1980 by convention. It might confuse users, if FAT dates given as
1980-00-00 under DOS and Windows are displayed as 1900-00-00
in newRPL. People might even search for such dates in order to identify files modified while the (PC) RTC wasn't set.

So, unless there are strong reasons to map this to 1900, I would suggest to map it to 1980 instead.
Code:

  dt->tm_year = 80;

Greetings,

Matthias

No strong reasons, I just changed it to 80 per your suggestion.

EDIT: (and I'll make sure to commit this time)
Find all posts by this user
Quote this message in a reply
10-18-2016, 09:36 PM (This post was last modified: 10-19-2016 06:52 PM by matthiaspaul.)
Post: #12
RE: [newRPL] Filesystem related date/time issues
(08-15-2016 12:13 PM)matthiaspaul Wrote:  
(08-15-2016 02:44 AM)Claudio L. Wrote:  Regarding the sanitization of date/time: the file system does nothing with these values, so invalid dates don't disturb its operation. I think the user code that uses these dates/times is better positioned to decide what to do with an invalid date than the file system layer (which would just mark as invalid and discard everything), so it's better not to touch them.
For example, a program might be looking for all files on a certain year, then doesn't care if the month is invalid as long as the year is good. Other program could be sorting files by date, and needs all fields to be valid. That same program could prefer to list a month 13 after the other files that year (and month 14 would go after 13 and before 15), while if we force it to zero to indicate invalid month, it would have to list it first (and there's no difference between month 13, 14 or 15).
That's right, and could be adjusted to use another special value to indicate "invalid". The reason why I proposed to use 0 is because that's what is actually used on disk by the FAT filesystem to indicate "month not set" or "day not set" (see the other comment above).

I don't think it would be a good idea to propagate invalid values (except for one "neutral" value) up to the frontend (RPL), as users are expecting "mathematical correctness" here (perhaps even doing some calendar calculations on it and just relying on that the values are either well-formatted or clearly indicated as invalid). They probably don't expect date functions to return impossibly high days and months (even if that would happen only in case the filesystem stores invalid entries - ensuring that it hasn't, is, I think, outside a user's responsibility and scope).

Being able to search for "month 13 or 14" might make sense on a lower level - for a lower level API function, perhaps. However, I'm not sure if that would still hold true on a calculator. Are you expecting people to write CHKDSK-like tools to be used on the calc?

(Sidenote: Except for the two special values of 0 for months and days, the only invalid values I have ever encountered in practise on FAT volumes were hour = 24, minute = 60 and second/2 = 30 as well as invalid days in February (but still not larger than 31. [...])
I stumbled upon an old bug-report for the original HP 49g+/50g firmware 2.09. Since it documents a problem with time stamps created by the calculator, I thought it might make sense to include it here for easier future reference:

http://bugs.hpcalc.org/show_bug.cgi?id=234
James M. Prange Wrote:FAT file system directory entries that the 49g+ and 50g write to the MMC/SD card sometimes have an incorrect file creation or last-modified time showing 60 seconds. This doesn't seem to cause any problems on the calculator itself, and MS Windows Explorer is "smart enough" to increment the minutes (it doesn't display the seconds) for these cases, but disk checking utilities such as MS ScanDisk and Norton Disk Doctor detect these errors (and optionally correct them). [...]
If you'd include timestamp validity checks at filesystem level, the existence of this bug in a "sister project" (where it cannot be fixed anymore, unfortunately) might be worth adding a special case to newRPL to cope and possibly auto-correct such timestamps (as Windows does), but otherwise it's just something having to keep in mind when developing the application-level code later on.

I didn't saw this occuring on the HP 50g myself (and didn't try to force it with 2.15 so far), but as I mentioned further up, I have observed basically the same problem (also for minutes and hours) in other not fully debugged filesystem implementations as well.

Greetings,

Matthias


--
"Programs are poems for computers."
Find all posts by this user
Quote this message in a reply
10-19-2016, 03:08 AM
Post: #13
RE: [newRPL] Filesystem related date/time issues
(10-18-2016 09:36 PM)matthiaspaul Wrote:  I stumbled upon an old bug-report for the original HP 49g+/50g firmware 2.09. Since it documents a problem with time stamps created by the calculator, I thought it might make sense to include in here for easier future reference:

http://bugs.hpcalc.org/show_bug.cgi?id=234
James M. Prange Wrote:FAT file system directory entries that the 49g+ and 50g write to the MMC/SD card sometimes have an incorrect file creation or last-modified time showing 60 seconds. This doesn't seem to cause any problems on the calculator itself, and MS Windows Explorer is "smart enough" to increment the minutes (it doesn't display the seconds) for these cases, but disk checking utilities such as MS ScanDisk and Norton Disk Doctor detect these errors (and optionally correct them). [...]
If you'd include timestamp validity checks at filesystem level, the existance of this bug in a "sister project" (where it cannot be fixed anymore, unfortunately) might be worth adding a special case to newRPL to cope and possibly auto-correct such timestamps (as Windows does), but otherwise it's just something having to keep in mind when developing the application-level code later on.

I didn't saw this occuring on the HP 50g myself (and didn't try to force it with 2.15 so far), but as I mentioned further up, I have observed basically the same problem (also for minutes and hours) in other not fully debugged filesystem implementations as well.

Greetings,

Matthias
OK, basic sanitization on read should probably be included. We'll get it done.
Find all posts by this user
Quote this message in a reply
Post Reply 




User(s) browsing this thread: 2 Guest(s)