Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/bin/extract_metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,15 @@ static int handle_variable_sav(int index, readstat_variable_t *variable, const c
} else if (hasPrefix(vformat, "YMDHMS16") == 0) {
// e.g. 2013-01-31 1:02
format = EXTRACT_METADATA_FORMAT_DATE_TIME;
pattern = "yyyy-MM-dd h:mm";
pattern = "yyyy-MM-dd hh:mm";
} else if (hasPrefix(vformat, "YMDHMS19") == 0) {
// e.g. 2013-01-31 1:02:33
format = EXTRACT_METADATA_FORMAT_DATE_TIME;
pattern = "yyyy-MM-dd h:mm:ss";
pattern = "yyyy-MM-dd hh:mm:ss";
} else if (hasPrefix(vformat, "YMDHMS19.2") == 0) {
// e.g. 2013-01-31 1:02:33.72
format = EXTRACT_METADATA_FORMAT_DATE_TIME;
pattern = "yyyy-MM-dd h:mm:ss.SS+";
pattern = "yyyy-MM-dd hh:mm:ss.SS+";
} else if (hasPrefix(vformat, "MTIME5") == 0) {
// e.g. 1754:36
format = EXTRACT_METADATA_FORMAT_TIME;
Expand Down
1 change: 1 addition & 0 deletions src/bin/read_csv/csv_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ typedef struct csv_metadata {
void *user_ctx;
readstat_variable_t *variables;
int* is_date;
int* is_date_time;
struct json_metadata *json_md;
rs_read_module_t *output_module;
} csv_metadata;
100 changes: 99 additions & 1 deletion src/bin/read_csv/mod_dta.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ void produce_column_header_dta(void *csv_metadata, const char *column, readstat_
break;
case EXTRACT_METADATA_FORMAT_TIME:
case EXTRACT_METADATA_FORMAT_DATE_TIME:
var->type = READSTAT_TYPE_INT32;
var->type = READSTAT_TYPE_DOUBLE;
snprintf(var->format, sizeof(var->format), "%s", "%tC");
// %tC => is equivalent to coordinated universal time (UTC)
break;
Expand Down Expand Up @@ -385,17 +385,115 @@ static readstat_value_t value_double_dta(const char *s, size_t len, struct csv_m
return value;
}

static readstat_value_t value_double_date_time_dta(const char *s, size_t len, struct csv_metadata *c) {
// Handle empty or NULL strings as missing values
if (s == NULL || len == 0 || *s == '\0') {
readstat_value_t value = {
.type = READSTAT_TYPE_DOUBLE,
.is_system_missing = 1,
.v = { .double_value = NAN }
};
return value;
}

// Truncate the date string to 23 characters to remove the timezone offset and
// microseconds, if present. STATA does not support timezones or microseconds.
char date_time[24];
snprintf(date_time, sizeof(date_time), "%s", s);

// Parse date-time components
int year, month, day, hour, minute, second, msecs = 0;
int matched = sscanf(
date_time,
"%d-%d-%d %d:%d:%d.%d",
&year, &month, &day, &hour, &minute, &second, &msecs
);
if (matched < 6 || matched > 8) {
fprintf(stderr, "%s:%d not a valid date-time: %s (expected format: yyyy-mm-dd hh:MM:SS with optional milliseconds. Datetime string is truncated at 23 characters to ignore microseconds and timezone information.)\n", __FILE__, __LINE__, date_time);
exit(EXIT_FAILURE);
}

// Get days since the epoch for the date
char days_since_epoch_string[11];
snprintf(days_since_epoch_string, sizeof(days_since_epoch_string), "%04d-%02d-%02d", year, month, day);
char* dest;
int days_since_epoch = readstat_dta_num_days(days_since_epoch_string, &dest);

// Add the hours, minutes, and seconds to the days
double msecs_since_epoch = 86400000.0 * days_since_epoch + hour * 3600000.0 + minute * 60000.0 + second * 1000.0 + msecs * 1.0;

// Adjust for leap seconds; 27 have occurred as of writing this code
// https://en.m.wikipedia.org/wiki/Leap_second
typedef struct {
int year;
int month;
int day;
} leap_second_date;

leap_second_date leap_seconds[] = {
{1972, 6, 30}, {1972, 12, 31}, // +2 seconds in 1972
{1973, 12, 31}, // +1 second in 1973
{1974, 12, 31}, // +1 second in 1974
{1975, 12, 31}, // +1 second in 1975
{1976, 12, 31}, // +1 second in 1976
{1977, 12, 31}, // +1 second in 1977
{1978, 12, 31}, // +1 second in 1978
{1979, 12, 31}, // +1 second in 1979
{1981, 6, 30}, // +1 second in 1981
{1982, 6, 30}, // +1 second in 1982
{1983, 6, 30}, // +1 second in 1983
{1985, 6, 30}, // +1 second in 1985
{1987, 12, 31}, // +1 second in 1987
{1989, 12, 31}, // +1 second in 1989
{1990, 12, 31}, // +1 second in 1990
{1992, 6, 30}, // +1 second in 1992
{1993, 6, 30}, // +1 second in 1993
{1994, 6, 30}, // +1 second in 1994
{1995, 12, 31}, // +1 second in 1995
{1997, 6, 30}, // +1 second in 1997
{1998, 12, 31}, // +1 second in 1998
{2005, 12, 31}, // +1 second in 2005
{2008, 12, 31}, // +1 second in 2008
{2012, 6, 30}, // +1 second in 2012
{2015, 6, 30}, // +1 second in 2015
{2016, 12, 31} // +1 second in 2016
};

int leap_second_count = sizeof(leap_seconds) / sizeof(leap_seconds[0]);
int leap_seconds_to_add = 0;

for (int i = 0; i < leap_second_count; i++) {
// If the date is after this leap second, add one second
if (
(year > leap_seconds[i].year) ||
(year == leap_seconds[i].year && month > leap_seconds[i].month) ||
(year == leap_seconds[i].year && month == leap_seconds[i].month && day > leap_seconds[i].day)
) { leap_seconds_to_add++; }
}
msecs_since_epoch += leap_seconds_to_add * 1000.0;

readstat_value_t value = {
.type = READSTAT_TYPE_DOUBLE,
.v = { .double_value = msecs_since_epoch }
};

return value;
}

void produce_csv_value_dta(void *csv_metadata, const char *s, size_t len) {
struct csv_metadata *c = (struct csv_metadata *)csv_metadata;
readstat_variable_t *var = &c->variables[c->columns];
int is_date = c->is_date[c->columns];
int is_date_time = c->is_date_time[c->columns];
int obs_index = c->rows - 1; // TODO: ???
readstat_value_t value;

if (len == 0) {
value = value_sysmiss(s, len, c);
} else if (is_date) {
value = value_int32_date_dta(s, len, c);
} else if (is_date_time) {
value = value_double_date_time_dta(s, len, c);
} else if (var->type == READSTAT_TYPE_DOUBLE) {
value = value_double_dta(s, len, c);
} else if (var->type == READSTAT_TYPE_STRING) {
Expand Down
6 changes: 6 additions & 0 deletions src/bin/read_csv/read_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ static void produce_column_header(struct csv_metadata *c, void *s, size_t len) {

extract_metadata_format_t colformat = column_format(c->json_md, column);
c->is_date[c->columns] = colformat == EXTRACT_METADATA_FORMAT_DATE;
c->is_date_time[c->columns] = colformat == EXTRACT_METADATA_FORMAT_DATE_TIME;
if (c->output_module->header) {
c->output_module->header(c, column, var);
}
Expand Down Expand Up @@ -75,6 +76,7 @@ static void csv_metadata_cell(void *s, size_t len, void *data)
if (c->rows == 0) {
c->variables = realloc(c->variables, (c->columns+1) * sizeof(readstat_variable_t));
c->is_date = realloc(c->is_date, (c->columns+1) * sizeof(int));
c->is_date_time = realloc(c->is_date_time, (c->columns+1) * sizeof(int));
produce_column_header(c, s, len);
} else if (c->rows >= 1 && c->handle.value && c->output_module->csv_value) {
c->output_module->csv_value(c, s, len);
Expand Down Expand Up @@ -184,6 +186,10 @@ readstat_error_t readstat_parse_csv(readstat_parser_t *parser,
free(md->is_date);
md->is_date = NULL;
}
if (md->is_date_time) {
free(md->is_date_time);
md->is_date_time = NULL;
}
csv_free(p);
io->close(io->io_ctx);
return retval;
Expand Down
Loading