Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/nightly' into optimize-strings…
Browse files Browse the repository at this point in the history
…liceiter
  • Loading branch information
martinvuyk committed Nov 8, 2024
2 parents ae2fa12 + 5180ef0 commit d8ab4a0
Show file tree
Hide file tree
Showing 21 changed files with 1,480 additions and 859 deletions.
3 changes: 3 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ what we publish.
of variables that are handled as synthetic types, e.g. `List` from Mojo or
`std::vector` from C++.

- Added `os.path.expandvars` to expand environment variables in a string.
([PR #3735](https://github.com/modularml/mojo/pull/3735) by [@thatstoasty](https://github.com/thatstoasty)).

### 🦋 Changed

- More things have been removed from the auto-exported set of entities in the `prelude`
Expand Down
519 changes: 314 additions & 205 deletions examples/magic.lock

Large diffs are not rendered by default.

628 changes: 359 additions & 269 deletions examples/notebooks/magic.lock

Large diffs are not rendered by default.

555 changes: 341 additions & 214 deletions magic.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ benchmarks = { cmd = ["./stdlib/scripts/run-benchmarks.sh"], env = { MODULAR_MOJ

[dependencies]
python = ">=3.9,<3.13"
max = "*"
lit = "*"
max = "*"
3 changes: 2 additions & 1 deletion stdlib/src/builtin/_pybind.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ from python._bindings import (
ConvertibleFromPython,
PythonableAndConvertibleFromPython,
PyMojoObject,
python_type_object,
py_c_function_wrapper,
check_argument_type,
# Imported for use by the compiler
Expand Down Expand Up @@ -85,7 +86,7 @@ fn gen_pytype_wrapper[
# TODO(MOCO-1302): Add support for generating member field as computed properties.
# TODO(MOCO-1307): Add support for constructor generation.

var type_obj = PyMojoObject[T].python_type_object[name](
var type_obj = python_type_object[T, name](
methods=List[PyMethodDef](),
)

Expand Down
16 changes: 6 additions & 10 deletions stdlib/src/builtin/format_int.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -322,13 +322,12 @@ fn _try_write_int[
# Construct a null-terminated buffer of single-byte char.
var zero_buf = InlineArray[UInt8, 2](zero_char, 0)

# TODO(MSTDL-720):
# Support printing non-null-terminated strings on GPU and switch
# back to this code without a workaround.
# ptr=digit_chars_array,
var zero = StringSlice[ImmutableAnyOrigin](
# TODO(MSTDL-720):
# Support printing non-null-terminated strings on GPU and switch
# back to this code without a workaround.
# unsafe_from_utf8_ptr=digit_chars_array,
unsafe_from_utf8_ptr=zero_buf.unsafe_ptr(),
len=1,
ptr=zero_buf.unsafe_ptr(), length=1
)
writer.write(zero)

Expand Down Expand Up @@ -404,10 +403,7 @@ fn _try_write_int[

# SAFETY:
# Create a slice to only those bytes in `buf` that have been initialized.
var str_slice = StringSlice[__origin_of(buf)](
unsafe_from_utf8_ptr=buf_ptr,
len=len,
)
var str_slice = StringSlice[__origin_of(buf)](ptr=buf_ptr, length=len)

writer.write(str_slice)

Expand Down
124 changes: 68 additions & 56 deletions stdlib/src/builtin/simd.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -562,14 +562,7 @@ struct SIMD[type: DType, size: Int](
constrained[type.is_numeric(), "the SIMD type must be numeric"]()

@parameter
if _is_sm_9x() and type is DType.bfloat16:
return _call_ptx_intrinsic[
scalar_instruction="add.rn.bf16",
vector2_instruction="add.rn.bf16x2",
scalar_constraints="=h,h,h",
vector_constraints="=r,r,r",
](self, rhs)
elif _is_sm_8x() and type.is_half_float():
if _is_sm_8x() and type.is_half_float():
return self.fma(1, rhs)

return __mlir_op.`pop.add`(self.value, rhs.value)
Expand Down Expand Up @@ -617,13 +610,6 @@ struct SIMD[type: DType, size: Int](
return (rebind[Self._Mask](self) & rebind[Self._Mask](rhs)).cast[
type
]()
elif _is_sm_9x() and type is DType.bfloat16:
return _call_ptx_intrinsic[
scalar_instruction="mul.rn.bf16",
vector2_instruction="mul.rn.bf16x2",
scalar_constraints="=h,h,h",
vector_constraints="=r,r,r",
](self, rhs)
elif _is_sm_8x() and type.is_half_float():
return self.fma(rhs, -0.0)

Expand Down Expand Up @@ -1597,54 +1583,73 @@ struct SIMD[type: DType, size: Int](
@parameter
if type == target:
return rebind[SIMD[target, size]](self)
elif (
triple_is_nvidia_cuda()
and type is DType.float32
and target is DType.bfloat16
and size >= 2
):
var res = SIMD[target, size]()

@parameter
if triple_is_nvidia_cuda():

@parameter
for i in range(0, size, 2):
var bf16x2_as_uint32 = inlined_assembly[
"cvt.rn.bf16x2.f32 $0, $1, $2;",
UInt32,
constraints="=r,f,f",
has_side_effect=False,
](rebind[Float32](self[i + 1]), rebind[Float32](self[i]))
res = res.insert[offset=i](bitcast[target, 2](bf16x2_as_uint32))
if size > 1 and type is DType.float32 and target.is_half_float():
# For size == 1, the LLVM backend generates the correct `cvt.rn.f16.f32`
# instruction. This is why we do not handle it here.
alias vector_asm_prefix = "cvt.rn.f16x2.f32" if target is DType.float16 else "cvt.rn.bf16x2.f32"
var res = SIMD[target, size]()

return res
@parameter
for i in range(0, size, 2):
var bf16x2_as_uint32 = inlined_assembly[
vector_asm_prefix + " $0, $1, $2;",
UInt32,
constraints="=r,f,f",
has_side_effect=False,
](
rebind[Float32](self[i + 1]),
rebind[Float32](self[i]),
)
res = res.insert[offset=i](
bitcast[target, 2](bf16x2_as_uint32)
)

elif has_neon() and (
type is DType.bfloat16 or target == DType.bfloat16
):
return res

elif type is DType.bfloat16 and target is DType.float64:
# Convert to F64 via a Float32 pathway. This would allow us to
# use the optimizations defined above.
return self.cast[DType.float32]().cast[target]()

@parameter
if has_neon() and (type is DType.bfloat16 or target == DType.bfloat16):
# TODO(KERN-228): support BF16 on neon systems.
return _unchecked_zero[target, size]()
elif type is DType.bool:

@parameter
if type is DType.bool:
return self.select(SIMD[target, size](1), SIMD[target, size](0))
elif target == DType.bool:

@parameter
if target == DType.bool:
return rebind[SIMD[target, size]](self != 0)
elif type is DType.bfloat16 and not _has_native_bf16_support():
var cast_result = _bfloat16_to_f32(
rebind[SIMD[DType.bfloat16, size]](self)
).cast[target]()
return rebind[SIMD[target, size]](cast_result)
elif target == DType.bfloat16 and not _has_native_bf16_support():

@parameter
if type is DType.bfloat16 and target is DType.float32:
return rebind[SIMD[target, size]](
_bfloat16_to_f32(rebind[SIMD[DType.bfloat16, size]](self))
)

@parameter
if type is DType.float32 and target == DType.bfloat16:
return rebind[SIMD[target, size]](
_f32_to_bfloat16(self.cast[DType.float32]())
)
else:
return __mlir_op.`pop.cast`[
_type = __mlir_type[
`!pop.simd<`,
size.value,
`, `,
target.value,
`>`,
]
](self.value)

return __mlir_op.`pop.cast`[
_type = __mlir_type[
`!pop.simd<`,
size.value,
`, `,
target.value,
`>`,
]
](self.value)

@no_inline
fn write_to[W: Writer](self, inout writer: W):
Expand Down Expand Up @@ -2996,10 +3001,17 @@ fn _bfloat16_to_f32_scalar(
# TODO(KERN-228): support BF16 on neon systems.
return _unchecked_zero[DType.float32, 1]()

var bfloat_bits = FPUtils[DType.bfloat16].bitcast_to_integer(val)
return FPUtils[DType.float32].bitcast_from_integer(
bfloat_bits << _fp32_bf16_mantissa_diff
)
# For bfloat16, we can just do a memcpy to perform the cast to float32.
@parameter
if triple_is_nvidia_cuda():
return inlined_assembly[
"cvt.f32.bf16 $0, $1;" if _is_sm_9x() else "mov.b32 $0, {0, $1};",
Scalar[DType.float32],
constraints="=f,h",
has_side_effect=False,
](bitcast[DType.int16](val))

return bitcast[DType.float32, 1](SIMD[DType.bfloat16, 2](0, val))


@always_inline
Expand Down
4 changes: 1 addition & 3 deletions stdlib/src/builtin/string_literal.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,7 @@ struct StringLiteral(
# FIXME(MSTDL-160):
# Enforce UTF-8 encoding in StringLiteral so this is actually
# guaranteed to be valid.
return StaticString(
unsafe_from_utf8_ptr=self.unsafe_ptr(), len=self.byte_length()
)
return StaticString(ptr=self.unsafe_ptr(), length=self.byte_length())

@always_inline
fn as_bytes(self) -> Span[Byte, StaticConstantOrigin]:
Expand Down
18 changes: 9 additions & 9 deletions stdlib/src/collections/string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -1118,9 +1118,9 @@ struct String(
var rhs_ptr = rhs.unsafe_ptr()
alias S = StringSlice[ImmutableAnyOrigin]
if lhs_len == 0:
return String(S(unsafe_from_utf8_ptr=rhs_ptr, len=rhs_len))
return String(S(ptr=rhs_ptr, length=rhs_len))
elif rhs_len == 0:
return String(S(unsafe_from_utf8_ptr=lhs_ptr, len=lhs_len))
return String(S(ptr=lhs_ptr, length=lhs_len))
var sum_len = lhs_len + rhs_len
var buffer = Self._buffer_type(capacity=sum_len + 1)
var ptr = buffer.unsafe_ptr()
Expand Down Expand Up @@ -1211,7 +1211,7 @@ struct String(
var o_ptr = other.unsafe_ptr()
if s_len == 0:
alias S = StringSlice[ImmutableAnyOrigin]
self = String(S(unsafe_from_utf8_ptr=o_ptr, len=o_len))
self = String(S(ptr=o_ptr, length=o_len))
return
elif o_len == 0:
return
Expand Down Expand Up @@ -2031,12 +2031,12 @@ struct String(
"""
if end == -1:
return StringSlice[__origin_of(self)](
unsafe_from_utf8_ptr=self.unsafe_ptr() + start,
len=self.byte_length() - start,
ptr=self.unsafe_ptr() + start,
length=self.byte_length() - start,
).startswith(prefix.as_string_slice())

return StringSlice[__origin_of(self)](
unsafe_from_utf8_ptr=self.unsafe_ptr() + start, len=end - start
ptr=self.unsafe_ptr() + start, length=end - start
).startswith(prefix.as_string_slice())

fn endswith(self, suffix: String, start: Int = 0, end: Int = -1) -> Bool:
Expand All @@ -2053,12 +2053,12 @@ struct String(
"""
if end == -1:
return StringSlice[__origin_of(self)](
unsafe_from_utf8_ptr=self.unsafe_ptr() + start,
len=self.byte_length() - start,
ptr=self.unsafe_ptr() + start,
length=self.byte_length() - start,
).endswith(suffix.as_string_slice())

return StringSlice[__origin_of(self)](
unsafe_from_utf8_ptr=self.unsafe_ptr() + start, len=end - start
ptr=self.unsafe_ptr() + start, length=end - start
).endswith(suffix.as_string_slice())

fn removeprefix(self, prefix: String, /) -> String:
Expand Down
2 changes: 1 addition & 1 deletion stdlib/src/os/__init__.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"""Implements the os package."""

from .atomic import Atomic
from .env import getenv, setenv
from .env import getenv, setenv, unsetenv
from .fstat import lstat, stat, stat_result
from .os import (
SEEK_CUR,
Expand Down
19 changes: 18 additions & 1 deletion stdlib/src/os/env.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ from os import setenv
```
"""

from sys import external_call, os_is_linux, os_is_macos
from sys import external_call, os_is_linux, os_is_macos, os_is_windows
from sys.ffi import c_int

from memory import UnsafePointer
from utils import StringRef
Expand Down Expand Up @@ -51,6 +52,22 @@ fn setenv(name: String, value: String, overwrite: Bool = True) -> Bool:
return status == 0


fn unsetenv(name: String) -> Bool:
"""Unsets an environment variable.
Args:
name: The name of the environment variable.
Returns:
True if unsetting the variable succeeded. Otherwise, False is returned.
"""
constrained[
not os_is_windows(), "operating system must be Linux or macOS"
]()

return external_call["unsetenv", c_int](name.unsafe_ptr()) == 0


fn getenv(name: String, default: String = "") -> String:
"""Returns the value of the given environment variable.
Expand Down
1 change: 1 addition & 0 deletions stdlib/src/os/path/__init__.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ from .path import (
dirname,
exists,
expanduser,
expandvars,
getsize,
isdir,
isfile,
Expand Down
Loading

0 comments on commit d8ab4a0

Please sign in to comment.