From 9e17f24967dcbe532f522627f57d18ad2cbee005 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Nov 05 2013 11:09:29 +0000 Subject: bound checking bug with caml_string_{get,set}{16,32,64}: fix the runtime C code As notified by Nicolas Trangez, the following program behaves in an unexpected way by returning 0 instead of failing with an out-of-bound exception: external get16 : string -> int -> int = "%caml_string_get16" let test = get16 "" 0 caml_string_get16(str, idx) will access indices (idx) and (idx+1). The bound-checking code is currently implemented as: if (idx < 0 || idx >= caml_string_length(str) - 1) caml_array_bound_error(); This is wrong as caml_string_length returns an mlsize_t which is unsigned, so substracting 1 gets buggy when the size is 0. The test should be written as follow: if (idx < 0 || idx + 1 >= caml_string_length(str)) caml_array_bound_error(); Note 1: we can exploit this bug to make out-of-bound access to a string, but I think get16 examples will run into the padding characters of OCaml strings and behave in a not-too-wrong way. It may also be the case of get32, but get64 will access 7 bytes, so access memory outside the string: # external set64: string -> int -> int -> unit = "%caml_string_get64";; external set64 : string -> int -> int -> unit = "%caml_string_get64" # set64 "" 0 0;; Segmentation fault Note 2: this first commit only fixes the C code in byterun/str.c. Only ocamlc actually uses these functions when the compiler primitive is used ("%caml_string_get16" instead of "caml_string_get16"). ocamlopt generates ocaml code directly, and this part has yet to be fixed in a following commit. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@14267 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 --- diff --git a/byterun/str.c b/byterun/str.c index 9a96147..9e157a8 100644 --- a/byterun/str.c +++ b/byterun/str.c @@ -68,7 +68,7 @@ CAMLprim value caml_string_get16(value str, value index) intnat res; unsigned char b1, b2; intnat idx = Long_val(index); - if (idx < 0 || idx >= caml_string_length(str) - 1) caml_array_bound_error(); + if (idx < 0 || idx + 1 >= caml_string_length(str)) caml_array_bound_error(); b1 = Byte_u(str, idx); b2 = Byte_u(str, idx + 1); #ifdef ARCH_BIG_ENDIAN @@ -84,7 +84,7 @@ CAMLprim value caml_string_get32(value str, value index) intnat res; unsigned char b1, b2, b3, b4; intnat idx = Long_val(index); - if (idx < 0 || idx >= caml_string_length(str) - 3) caml_array_bound_error(); + if (idx < 0 || idx + 3 >= caml_string_length(str)) caml_array_bound_error(); b1 = Byte_u(str, idx); b2 = Byte_u(str, idx + 1); b3 = Byte_u(str, idx + 2); @@ -109,7 +109,7 @@ CAMLprim value caml_string_get64(value str, value index) uint32 reslo; unsigned char b1, b2, b3, b4, b5, b6, b7, b8; intnat idx = Long_val(index); - if (idx < 0 || idx >= caml_string_length(str) - 7) caml_array_bound_error(); + if (idx < 0 || idx + 7 >= caml_string_length(str)) caml_array_bound_error(); b1 = Byte_u(str, idx); b2 = Byte_u(str, idx + 1); b3 = Byte_u(str, idx + 2); @@ -133,7 +133,7 @@ CAMLprim value caml_string_set16(value str, value index, value newval) unsigned char b1, b2; intnat val; intnat idx = Long_val(index); - if (idx < 0 || idx >= caml_string_length(str) - 1) caml_array_bound_error(); + if (idx < 0 || idx + 1 >= caml_string_length(str)) caml_array_bound_error(); val = Long_val(newval); #ifdef ARCH_BIG_ENDIAN b1 = 0xFF & val >> 8; @@ -152,7 +152,7 @@ CAMLprim value caml_string_set32(value str, value index, value newval) unsigned char b1, b2, b3, b4; intnat val; intnat idx = Long_val(index); - if (idx < 0 || idx >= caml_string_length(str) - 3) caml_array_bound_error(); + if (idx < 0 || idx + 3 >= caml_string_length(str)) caml_array_bound_error(); val = Int32_val(newval); #ifdef ARCH_BIG_ENDIAN b1 = 0xFF & val >> 24; @@ -178,7 +178,7 @@ CAMLprim value caml_string_set64(value str, value index, value newval) uint32 lo,hi; int64 val; intnat idx = Long_val(index); - if (idx < 0 || idx >= caml_string_length(str) - 7) caml_array_bound_error(); + if (idx < 0 || idx + 7 >= caml_string_length(str)) caml_array_bound_error(); val = Int64_val(newval); I64_split(val,hi,lo); #ifdef ARCH_BIG_ENDIAN