Skip to content

Commit 9722d22

Browse files
mgondanJanWielemaker
authored andcommitted
CLEANUP: avoid -Wconversion, improve UTF16 support
On systems with `wchar_t` is 4 bytes (pretty much anything but Windows), convert Prolog Unicode code points > 0xffff to/from UTF-16.
1 parent 77380f9 commit 9722d22

1 file changed

Lines changed: 69 additions & 29 deletions

File tree

src/main/c/jpl.c

Lines changed: 69 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Author: Paul Singleton, Fred Dushin and Jan Wielemaker
44
55
WWW: http://www.swi-prolog.org
6-
Copyright (c) 2004-2017, Paul Singleton
6+
Copyright (c) 2004-2026, Paul Singleton
77
All rights reserved.
88
99
Redistribution and use in source and binary forms, with or without
@@ -99,8 +99,6 @@ refactoring (trivial):
9999
#include <jni.h>
100100

101101
/* ANSI/ISO C library header (?): */
102-
#include <ctype.h>
103-
#include <errno.h>
104102
#include <stdlib.h>
105103
#include <string.h>
106104

@@ -111,6 +109,42 @@ refactoring (trivial):
111109

112110
#include <assert.h>
113111

112+
/* UTF-16 encoding/decoding. Copied from src/os/pl-utf8.h from Prolog
113+
*/
114+
115+
#define IS_UTF16_LEAD(c) ((c) >= 0xD800 && (c) <= 0xDBFF)
116+
#define IS_UTF16_TRAIL(c) ((c) >= 0xDC00 && (c) <= 0xDFFF)
117+
118+
static inline int
119+
utf16_decode(int lead, int trail)
120+
{ int l = (lead-0xD800) << 10;
121+
int t = (trail-0xDC00);
122+
123+
return l+t+0x10000;
124+
}
125+
126+
static inline void
127+
utf16_encode(int c, int *lp, int *tp)
128+
{ c -= 0x10000;
129+
*lp = (c>>10)+0xD800;
130+
*tp = (c&0X3FF)+0xDC00;
131+
}
132+
133+
static inline jchar*
134+
utf16_put_jchar(jchar *out, int chr)
135+
{ if ( chr <= 0xffff )
136+
{ *out++ = (jchar)chr;
137+
} else
138+
{ int l, t;
139+
140+
utf16_encode(chr, &l, &t);
141+
*out++ = (jchar)l;
142+
*out++ = (jchar)t;
143+
}
144+
145+
return out;
146+
}
147+
114148
/*=== JNI constants ======================================================= */
115149

116150
#define JNI_MIN_JCHAR 0
@@ -1021,25 +1055,32 @@ jni_String_to_atom(JNIEnv *env, jobject s, atom_t *a)
10211055
const jchar *jcp = (*env)->GetStringChars(env, s, NULL);
10221056

10231057
if ( s == NULL )
1024-
return FALSE;
1058+
return false;
10251059

10261060
#if SIZEOF_WCHAR_T == 2
10271061
{ *a = PL_new_atom_wchars(len, jcp); /* easy, huh? (thanks, Jan) */
10281062
}
10291063
#else
10301064
{ pl_wchar_t tmp[FASTJCHAR];
1031-
pl_wchar_t *wp;
1032-
jsize i;
1065+
pl_wchar_t *wp, *wpi;
1066+
const jchar *jcpi;
10331067

10341068
wp = len <= FASTJCHAR ? tmp : malloc(sizeof(pl_wchar_t) * len);
10351069
if ( !wp )
10361070
{ (*env)->ReleaseStringChars(env, s, jcp);
1037-
return FALSE;
1071+
return false;
1072+
}
1073+
for (jcpi=jcp, wpi=wp; jcpi - jcp < len; wpi++, jcpi++)
1074+
{ if ( IS_UTF16_LEAD(*jcpi) && IS_UTF16_TRAIL(*(jcpi+1)) &&
1075+
jcpi-jcp < len-1)
1076+
{ *wpi = utf16_decode(*jcpi, *(jcpi+1));
1077+
jcpi++;
1078+
} else
1079+
{ *wpi = *jcpi;
1080+
}
10381081
}
1039-
for (i = 0; i < len; i++)
1040-
wp[i] = jcp[i];
10411082

1042-
*a = PL_new_atom_wchars(len, wp);
1083+
*a = PL_new_atom_wchars(wpi-wp, wp);
10431084
if ( wp != tmp )
10441085
free(wp);
10451086
}
@@ -1083,22 +1124,21 @@ jni_new_wstring(JNIEnv *env, const pl_wchar_t *s, size_t len, jobject *obj)
10831124
#if SIZEOF_WCHAR_T == 2
10841125
return (*obj = (*env)->NewString(env, s, (jsize)len)) != NULL;
10851126
#else
1086-
if ( len <= FASTJCHAR )
1087-
{ jchar tmp[FASTJCHAR];
1088-
size_t i;
1127+
if ( len * 2 <= FASTJCHAR )
1128+
{ jchar tmp[FASTJCHAR], *tmpi;
1129+
const pl_wchar_t *si;
10891130

1090-
for (i = 0; i < len; i++)
1091-
tmp[i] = s[i];
1092-
1093-
*obj = (*env)->NewString(env, tmp, len);
1131+
for (tmpi=tmp, si=s; si - s < len; si++)
1132+
tmpi = utf16_put_jchar(tmpi, *s);
1133+
*obj = (*env)->NewString(env, tmp, (jsize) (tmpi-tmp));
10941134
} else
1095-
{ jchar *js;
1096-
size_t i;
1135+
{ jchar *js, *jsi;
1136+
const pl_wchar_t *si;
10971137

1098-
if ( (js=malloc(sizeof(jchar) * len)) )
1099-
{ for (i = 0; i < len; i++)
1100-
js[i] = s[i];
1101-
*obj = (*env)->NewString(env, js, len);
1138+
if ( (js=malloc(sizeof(jchar) * len * 2)) )
1139+
{ for (jsi=js, si=s; si - s < len; si++)
1140+
jsi = utf16_put_jchar(jsi, *s);
1141+
*obj = (*env)->NewString(env, js, (jsize) (jsi-js));
11021142
free(js);
11031143
}
11041144
}
@@ -1108,7 +1148,6 @@ jni_new_wstring(JNIEnv *env, const pl_wchar_t *s, size_t len, jobject *obj)
11081148
}
11091149

11101150

1111-
11121151
static bool
11131152
jni_atom_to_String(JNIEnv *env, atom_t a, jobject *s)
11141153
{ size_t len;
@@ -4043,7 +4082,7 @@ Java_org_jpl7_fli_Prolog_get_1name_1arity(
40434082
PL_get_name_arity(term, &atom, &arity) &&
40444083
jni_atom_to_String(env, atom, &jname) &&
40454084
setStringValue(env, jname_holder, jname) &&
4046-
setIntValue(env, jarity_holder, arity) );
4085+
setIntValue(env, jarity_holder, (jint) arity) ); /* dubious cast */
40474086
}
40484087

40494088
/*
@@ -4092,15 +4131,15 @@ Java_org_jpl7_fli_Prolog_new_1atom(JNIEnv *env, jclass jProlog, jstring jname)
40924131
JNIEXPORT jobject JNICALL
40934132
Java_org_jpl7_fli_Prolog_new_1functor(JNIEnv *env, jclass jProlog,
40944133
jobject jatom, /* read-only */
4095-
jint jarity)
4134+
jlong jarity)
40964135
{ atom_t atom;
40974136
functor_t functor;
40984137
jobject rval;
40994138

41004139
if ( jpl_ensure_pvm_init(env) && jarity >= 0 &&
41014140
getAtomValue(env, jatom, &atom) &&
41024141
(rval = (*env)->AllocObject(env, jFunctorT_c)) &&
4103-
(functor = PL_new_functor(atom, (int)jarity)) &&
4142+
(functor = PL_new_functor(atom, (size_t)jarity)) &&
41044143
setUIntPtrValue(env, rval, functor) )
41054144
return rval;
41064145

@@ -4270,7 +4309,7 @@ Java_org_jpl7_fli_Prolog_open_1query(JNIEnv *env, jclass jProlog,
42704309
JNIEXPORT jobject JNICALL
42714310
Java_org_jpl7_fli_Prolog_predicate(JNIEnv *env, jclass jProlog,
42724311
jstring jname, /* ought not be null */
4273-
jint jarity, /* oughta be >= 0 */
4312+
jlong jarity, /* oughta be >= 0 */
42744313
jstring jmodule /* may be null */
42754314
)
42764315
{ atom_t pname; /* the predicate's name, as an atom */
@@ -4282,7 +4321,8 @@ Java_org_jpl7_fli_Prolog_predicate(JNIEnv *env, jclass jProlog,
42824321

42834322
DEBUG(1,
42844323
Sdprintf(
4285-
">predicate(env=%p,jProlog=%p,jname=%p,jarity=%" PRId32 ",jmodule=%p)...\n",
4324+
">predicate(env=%p,jProlog=%p,jname=%p,jarity=%" PRId64
4325+
",jmodule=%p)...\n",
42864326
env, jProlog, jname, jarity, jmodule));
42874327
return (jpl_ensure_pvm_init(env) &&
42884328
jni_String_to_atom(env, jname,

0 commit comments

Comments
 (0)