diff --git a/R/fread.R b/R/fread.R index 16a72ed24d..608984fad0 100644 --- a/R/fread.R +++ b/R/fread.R @@ -182,39 +182,8 @@ yaml=FALSE, tmpdir=tempdir(), tz="UTC") call_args = names(match.call()) if (is.character(skip)) warningf("Combining a search string as 'skip' and reading a YAML header may not work as expected -- currently, reading will proceed to search for 'skip' from the beginning of the file, NOT from the end of the metadata; please file an issue on GitHub if you'd like to see more intuitive behavior supported.") - # create connection to stream header lines from file: - # https://stackoverflow.com/questions/9871307 - f = base::file(input, 'r') - first_line = readLines(f, n=1L) - n_read = 1L - yaml_border_re = '^#?---' - if (!grepl(yaml_border_re, first_line)) { - close(f) - stopf( - 'Encountered <%s%s> at the first unskipped line (%d), which does not constitute the start to a valid YAML header (expecting something matching regex "%s"); please check your input and try again.', - substr(first_line, 1L, 50L), if (nchar(first_line) > 50L) '...' else '', 1L+skip, yaml_border_re - ) - } - - yaml_comment_re = '^#' - yaml_string = character(0L) - repeat { - this_line = readLines(f, n=1L) - n_read = n_read + 1L - if (!length(this_line)){ - close(f) - stopf('Reached the end of the file before finding a completion to the YAML header. A valid YAML header is bookended by lines matching the regex "%s". Please double check the input file is a valid csvy.', yaml_border_re) - } - if (grepl(yaml_border_re, this_line)) break - if (grepl(yaml_comment_re, this_line)) - this_line = sub(yaml_comment_re, '', this_line) - yaml_string = paste(yaml_string, this_line, sep='\n') - } - close(f) # when #561 is implemented, no need to close f. - - yaml_header = yaml::yaml.load(yaml_string) + yaml_header = .read_yaml_header(input, skip, verbose) yaml_names = names(yaml_header) - if (verbose) catf('Processed %d lines of YAML metadata with the following top-level fields: %s\n', n_read, brackify(yaml_names)) # process header first since it impacts how to handle colClasses if ('header' %chin% yaml_names) { if ('header' %chin% call_args) messagef("User-supplied 'header' will override that found in metadata.") @@ -384,6 +353,44 @@ yaml=FALSE, tmpdir=tempdir(), tz="UTC") ans } +# nocov start. Covered only in other.Rraw +.read_yaml_header = function(f, skip, verbose) { + # create connection to stream header lines from file: + # https://stackoverflow.com/questions/9871307 + con = base::file(f, 'r') + # NB: close() won't be right if 'f' itself can be a file connection (#561) + on.exit(close(con)) + + first_line = readLines(con, n=1L) + n_read = 1L + yaml_border_re = '^#?---' + if (!grepl(yaml_border_re, first_line)) { + stopf( + 'Encountered <%s%s> at the first unskipped line (%d), which does not constitute the start to a valid YAML header (expecting something matching regex "%s"); please check your input and try again.', + substr(first_line, 1L, 50L), if (nchar(first_line) > 50L) '...' else '', 1L+skip, yaml_border_re + ) + } + + yaml_comment_re = '^#' + yaml_string = character(0L) + repeat { + this_line = readLines(con, n=1L) + n_read = n_read + 1L + if (!length(this_line)){ + stopf('Reached the end of the file before finding a completion to the YAML header. A valid YAML header is bookended by lines matching the regex "%s". Please double check the input file is a valid csvy.', yaml_border_re) + } + if (grepl(yaml_border_re, this_line)) break + if (grepl(yaml_comment_re, this_line)) + this_line = sub(yaml_comment_re, '', this_line) + yaml_string = paste(yaml_string, this_line, sep='\n') + } + + yaml_header = yaml::yaml.load(yaml_string) + if (verbose) catf('Processed %d lines of YAML metadata with the following top-level fields: %s\n', n_read, brackify(names(yaml_header))) + yaml_header +} +# nocov end. + known_signatures = list( zip = as.raw(c(0x50, 0x4b, 0x03, 0x04)), # charToRaw("PK\x03\x04") gzip = as.raw(c(0x1F, 0x8B)),