Skip to content

Commit f859e8d

Browse files
committed
Add hts_parse_decimal() flags parameter and HTS_PARSE_THOUSANDS_SEP
hts_parse_decimal() has not yet appeared in an HTSlib release so we can still change its signature. We may in future add other parser flags and/or an hts_parse_region() with a flags parameter alongside the existing hts_parse_reg(). Use HTS_PARSE_THOUSANDS_SEP in hts_parse_reg() (for historical reasons) but not in regidx.c/synced_bcf_reader.c which are used in a list argument like "REGION,REGION,REGION". Fixes samtools/bcftools#309.
1 parent b82f5ef commit f859e8d

4 files changed

Lines changed: 27 additions & 23 deletions

File tree

hts.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1824,7 +1824,7 @@ static inline long long push_digit(long long i, char c)
18241824
return 10 * i + digit;
18251825
}
18261826

1827-
long long hts_parse_decimal(const char *str, char **end)
1827+
long long hts_parse_decimal(const char *str, char **strend, int flags)
18281828
{
18291829
long long n = 0;
18301830
int decimals = 0, e = 0, lost = 0;
@@ -1837,7 +1837,7 @@ long long hts_parse_decimal(const char *str, char **end)
18371837
if (*s == '+' || *s == '-') sign = *s++;
18381838
while (*s)
18391839
if (isdigit(*s)) n = push_digit(n, *s++);
1840-
else if (*s == ',') s++;
1840+
else if (*s == ',' && (flags & HTS_PARSE_THOUSANDS_SEP)) s++;
18411841
else break;
18421842

18431843
if (*s == '.') {
@@ -1860,7 +1860,7 @@ long long hts_parse_decimal(const char *str, char **end)
18601860
fprintf(stderr, "[W::%s] discarding fractional part of %.*s\n",
18611861
__func__, (int)(s - str), str);
18621862

1863-
if (end) *end = (char *) s;
1863+
if (strend) *strend = (char *) s;
18641864
else if (*s && hts_verbose >= 2)
18651865
fprintf(stderr, "[W::%s] ignoring unknown characters after %.*s[%s]\n",
18661866
__func__, (int)(s - str), str, s);
@@ -1877,11 +1877,11 @@ const char *hts_parse_reg(const char *s, int *beg, int *end)
18771877
return s + strlen(s);
18781878
}
18791879

1880-
*beg = hts_parse_decimal(colon+1, &hyphen) - 1;
1880+
*beg = hts_parse_decimal(colon+1, &hyphen, HTS_PARSE_THOUSANDS_SEP) - 1;
18811881
if (*beg < 0) *beg = 0;
18821882

18831883
if (*hyphen == '\0') *end = INT_MAX;
1884-
else if (*hyphen == '-') *end = hts_parse_decimal(hyphen+1, NULL);
1884+
else if (*hyphen == '-') *end = hts_parse_decimal(hyphen+1, NULL, HTS_PARSE_THOUSANDS_SEP);
18851885
else return NULL;
18861886

18871887
if (*beg >= *end) return NULL;

htslib/hts.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -477,18 +477,22 @@ hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx);
477477
int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped);
478478
uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx);
479479

480+
481+
#define HTS_PARSE_THOUSANDS_SEP 1 ///< Ignore ',' separators within numbers
482+
480483
/// Parse a numeric string
481-
/** The number may be expressed in scientific notation, and may contain commas
482-
in the integer part (before any decimal point or E notation).
483-
@param str String to be parsed
484-
@param end If non-NULL, set on return to point to the first character
485-
in @a str after those forming the parsed number
484+
/** The number may be expressed in scientific notation, and optionally may
485+
contain commas in the integer part (before any decimal point or E notation).
486+
@param str String to be parsed
487+
@param strend If non-NULL, set on return to point to the first character
488+
in @a str after those forming the parsed number
489+
@param flags Or'ed-together combination of HTS_PARSE_* flags
486490
@return Converted value of the parsed number.
487491
488-
When @a end is NULL, a warning will be printed (if hts_verbose is 2
492+
When @a strend is NULL, a warning will be printed (if hts_verbose is 2
489493
or more) if there are any trailing characters after the number.
490494
*/
491-
long long hts_parse_decimal(const char *str, char **end);
495+
long long hts_parse_decimal(const char *str, char **strend, int flags);
492496

493497
/// Parse a "CHR:START-END"-style region string
494498
/** @param str String to be parsed

regidx.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,11 +297,11 @@ int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, reg_t *re
297297
*chr_end = se-1;
298298

299299
ss = se+1;
300-
reg->start = hts_parse_decimal(ss, &se);
300+
reg->start = hts_parse_decimal(ss, &se, 0);
301301
if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }
302302

303303
ss = se+1;
304-
reg->end = hts_parse_decimal(ss, &se) - 1;
304+
reg->end = hts_parse_decimal(ss, &se, 0) - 1;
305305
if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }
306306

307307
return 0;
@@ -322,15 +322,15 @@ int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, reg_t *re
322322
*chr_end = se-1;
323323

324324
ss = se+1;
325-
reg->start = hts_parse_decimal(ss, &se) - 1;
325+
reg->start = hts_parse_decimal(ss, &se, 0) - 1;
326326
if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\n", line); return -2; }
327327

328328
if ( !se[0] || !se[1] )
329329
reg->end = reg->start;
330330
else
331331
{
332332
ss = se+1;
333-
reg->end = hts_parse_decimal(ss, &se);
333+
reg->end = hts_parse_decimal(ss, &se, 0);
334334
if ( ss==se ) reg->end = reg->start;
335335
else reg->end--;
336336
}

synced_bcf_reader.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -887,7 +887,7 @@ static bcf_sr_regions_t *_regions_init_string(const char *str)
887887
if ( *ep==':' )
888888
{
889889
sp = ep+1;
890-
from = hts_parse_decimal(sp,(char**)&ep);
890+
from = hts_parse_decimal(sp,(char**)&ep,0);
891891
if ( sp==ep )
892892
{
893893
fprintf(stderr,"[%s:%d %s] Could not parse the region(s): %s\n", __FILE__,__LINE__,__FUNCTION__,str);
@@ -906,7 +906,7 @@ static bcf_sr_regions_t *_regions_init_string(const char *str)
906906
}
907907
ep++;
908908
sp = ep;
909-
to = hts_parse_decimal(sp,(char**)&ep);
909+
to = hts_parse_decimal(sp,(char**)&ep,0);
910910
if ( *ep && *ep!=',' )
911911
{
912912
fprintf(stderr,"[%s:%d %s] Could not parse the region(s): %s\n", __FILE__,__LINE__,__FUNCTION__,str);
@@ -953,15 +953,15 @@ static int _regions_parse_line(char *line, int ichr,int ifrom,int ito, char **ch
953953
if ( i<=k ) return -1;
954954
if ( k==l )
955955
{
956-
*from = *to = hts_parse_decimal(ss, &tmp);
956+
*from = *to = hts_parse_decimal(ss, &tmp, 0);
957957
if ( tmp==ss ) return -1;
958958
}
959959
else
960960
{
961961
if ( k==ifrom )
962-
*from = hts_parse_decimal(ss, &tmp);
962+
*from = hts_parse_decimal(ss, &tmp, 0);
963963
else
964-
*to = hts_parse_decimal(ss, &tmp);
964+
*to = hts_parse_decimal(ss, &tmp, 0);
965965
if ( ss==tmp ) return -1;
966966

967967
for (i=k; i<l && *se; i++)
@@ -971,9 +971,9 @@ static int _regions_parse_line(char *line, int ichr,int ifrom,int ito, char **ch
971971
}
972972
if ( i<l ) return -1;
973973
if ( k==ifrom )
974-
*to = hts_parse_decimal(ss, &tmp);
974+
*to = hts_parse_decimal(ss, &tmp, 0);
975975
else
976-
*from = hts_parse_decimal(ss, &tmp);
976+
*from = hts_parse_decimal(ss, &tmp, 0);
977977
if ( ss==tmp ) return -1;
978978
}
979979

0 commit comments

Comments
 (0)