-
-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Contribute Morphuntion from Apple as open source software #35
Open
grhoten
wants to merge
3
commits into
main
Choose a base branch
from
morphuntion
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# | ||
# Copyright 2016-2024 Apple Inc. All rights reserved. | ||
# | ||
# Metadata directory | ||
.DS_Store | ||
.gradle | ||
.idea | ||
# Temporary build directory | ||
build | ||
dist | ||
# Generated documentation | ||
docs/headers | ||
docs/html | ||
docs/xml | ||
docs/pages | ||
# Vim | ||
*.swp | ||
# Cmake ignores | ||
CMakeLists.txt.user | ||
CMakeCache.txt | ||
CMakeFiles | ||
CMakeScripts | ||
CMakeBuild* | ||
cmake-build-* | ||
options.mk | ||
# Gradle | ||
gradle/wrapper/gradle-wrapper.jar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
# | ||
# Copyright 2018-2024 Apple Inc. All rights reserved. | ||
# | ||
cmake_minimum_required(VERSION 3.24) | ||
include(ExternalProject) | ||
include(CheckCXXCompilerFlag) | ||
|
||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") | ||
set(VERSIONS_MK_PATH ${CMAKE_SOURCE_DIR}/cmake/query_versions.mk) | ||
set(OPTIONS_MK_PATH ${CMAKE_SOURCE_DIR}/cmake/build_options.mk) | ||
include(morphuntionMacros) | ||
|
||
set_property(GLOBAL PROPERTY USE_FOLDERS ON) | ||
set(CMAKE_INSTALL_MESSAGE NEVER) | ||
|
||
# Configure number of processors | ||
get_num_processors(NUM_PROCESSORS_VAL) | ||
morphuntion_debug_vars(NUM_PROCESSORS_VAL) | ||
|
||
# Morphuntion cache variables | ||
set(NUM_PROCESSORS ${NUM_PROCESSORS_VAL} CACHE STRING "Number of cores to be used in make") | ||
|
||
# Morphuntion options | ||
option(PROFILING "Turn on code profiling" OFF) | ||
|
||
# Setting c++20 standard | ||
set(CMAKE_CXX_STANDARD 20) | ||
set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
set(CMAKE_CXX_EXTENSIONS OFF) | ||
|
||
# Equivalent to -fvisibility=hidden flag | ||
set(CMAKE_CXX_VISIBILITY_PRESET hidden) | ||
|
||
# Morphuntion version | ||
set(TAG_PREFIX Morphuntion) | ||
get_morphuntion_version(MORPHUNTION_VERSION_TAG) | ||
set(MORPHUNTION_VERSION ${MORPHUNTION_VERSION_TAG} CACHE STRING "Version of morphuntion to be used in publishing") | ||
morphuntion_debug_vars(MORPHUNTION_VERSION) | ||
|
||
# Declare morphuntion project | ||
project( | ||
Morphuntion | ||
LANGUAGES C CXX | ||
) | ||
set(CMAKE_INSTALL_LIBDIR lib) | ||
include(GNUInstallDirs) | ||
|
||
# Optionally compile with code profiling | ||
if(PROFILING) | ||
message("-- PROFILING TURNED ON") | ||
add_compile_options(-g -fprofile-instr-generate -fcoverage-mapping) | ||
add_link_options(-g -fprofile-instr-generate -fcoverage-mapping) | ||
endif() | ||
|
||
# Set these warning properties on a project level | ||
add_compile_options(-Wall -Wextra) | ||
|
||
# Add link time optimization for release build types for macOS | ||
if("${CMAKE_BUILD_TYPE}" MATCHES "MinSizeRel" OR "${CMAKE_BUILD_TYPE}" MATCHES "Release") | ||
add_compile_options(-flto) | ||
add_link_options(-flto) | ||
endif() | ||
|
||
# Set Morphuntion include, data directories | ||
set(MORPHUNTION_INCLUDE_ROOT ${CMAKE_BINARY_DIR}/morphuntion_headers) | ||
set(MORPHUNTION_DATA_ROOT_PREFIX ${CMAKE_BINARY_DIR}/morphuntion_data) | ||
set(MORPHUNTION_DATA_ROOT ${MORPHUNTION_DATA_ROOT_PREFIX}${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATADIR}) | ||
|
||
file(MAKE_DIRECTORY ${MORPHUNTION_DATA_ROOT}) | ||
file(MAKE_DIRECTORY ${MORPHUNTION_INCLUDE_ROOT}) | ||
|
||
add_library(xml2 INTERFACE IMPORTED GLOBAL) | ||
set_target_properties(xml2 PROPERTIES IMPORTED_LIBNAME xml2) | ||
target_include_directories(xml2 INTERFACE ${CMAKE_OSX_SYSROOT}/usr/include/libxml2) | ||
|
||
include(dependICU) | ||
|
||
# Runs Morphuntion unit tests: "make check" | ||
set(DYLD_LIBRARY_PATH ${ICU_LIB_DIRECTORY}:$<TARGET_FILE_DIR:morphuntion>) | ||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") | ||
set(DYLD_LIBRARY_PATH ${DYLD_LIBRARY_PATH}:$<TARGET_PROPERTY:CoreFoundation,INTERFACE_LINK_DIRECTORIES>) | ||
endif() | ||
|
||
add_subdirectory(ext EXCLUDE_FROM_ALL) | ||
add_subdirectory(tools EXCLUDE_FROM_ALL) | ||
add_subdirectory(resources) | ||
add_subdirectory(src) | ||
add_subdirectory(test EXCLUDE_FROM_ALL) | ||
|
||
add_custom_target(dist | ||
COMMAND sh -c "DESTDIR=${CMAKE_CURRENT_BINARY_DIR}/dist ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_install.cmake" | ||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} | ||
VERBATIM | ||
) | ||
add_dependencies(dist morphuntion morphuntion-headers morphuntion-data) | ||
|
||
add_subdirectory(docs EXCLUDE_FROM_ALL) | ||
|
||
# make list-commands | ||
add_custom_target(list-commands | ||
COMMAND echo "\ | ||
make list-commands : Shows this message\\n\\n\ | ||
make check : Runs unit tests\\n\ | ||
make check-headers : Tests whether all exported headers can be compiled independently.\\n\ | ||
make morphuntion : Builds the shared library.\\n\ | ||
make morphuntion-headers : Copy all morphuntion public headers to <build>/morphuntion_headers.\\n\ | ||
make morphuntion-data : Generate all morphuntion data under <build>/morphuntion_data.\\n\ | ||
make dist : Builds morphuntion, the headers, and the data.\\n\ | ||
make coverage : Generates code coverage using sonar-scanner\\n\ | ||
make generate-coverage-csv : Generates code coverage as a csv\\n\ | ||
" | ||
VERBATIM | ||
) | ||
# end section | ||
|
||
install(TARGETS morphuntion LIBRARY COMPONENT morphuntion_library) | ||
install(DIRECTORY ${MORPHUNTION_INCLUDE_ROOT}/ TYPE INCLUDE COMPONENT morphuntion_headers) | ||
install(DIRECTORY ${MORPHUNTION_DATA_ROOT}/ TYPE DATA COMPONENT morphuntion_data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
COPYRIGHT AND PERMISSION NOTICE | ||
|
||
Copyright 2016-2024 Apple Inc. All rights reserved. | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining | ||
a copy of this software and associated documentation files (the | ||
"Software"), to deal in the Software without restriction, including | ||
without limitation the rights to use, copy, modify, merge, publish, | ||
distribute, sublicense, and/or sell copies of the Software, and to | ||
permit persons to whom the Software is furnished to do so, subject to | ||
the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be | ||
included in all copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
|
||
SPDX-License-Identifier: MIT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
<!-- | ||
Copyright 2016-2024 Apple Inc. All rights reserved. | ||
--> | ||
# Morphuntion | ||
nciric marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
## About Morphuntion | ||
|
||
Morphuntion is a C/C++ library that provides support for the following tasks. | ||
|
||
- Word inflection of a word into another [surface form](https://en.wikipedia.org/wiki/Surface_form) of another word. | ||
- Grammatical agreement between words. | ||
- Querying grammatical properties as [grammemes](https://en.wiktionary.org/wiki/grammeme) (the values of [grammatical categories](https://en.wikipedia.org/wiki/Grammatical_category)) | ||
|
||
It uses C++20, [ICU4C](https://icu.unicode.org/), UTF-16 strings (just like Java) and a data source of lexical | ||
dictionaries that contain relationships between inflections of a word. Just like ICU, it is thread safe between service | ||
objects, but mutable objects are not necessarily thread safe between threads. | ||
|
||
By making this implementation open sourced, various software frameworks can generate grammatically correct messages | ||
nciric marked this conversation as resolved.
Show resolved
Hide resolved
|
||
and to lower the barriers to correctly localizing software. | ||
|
||
### Platforms | ||
Morphuntion is currently supported on these operating systems: | ||
|
||
* iOS | ||
* iPadOS | ||
* macOS | ||
* tvOS | ||
* watchOS | ||
* visionOS | ||
* UBI Linux 9 | ||
* Ubuntu Linux 22 | ||
|
||
## How Morphuntion works | ||
The following sections delve a bit deeper into the low-level functionality of Morphuntion, such as how caching, | ||
multi-threading, work with Morphuntion. These sections are meant as a guide to utilizing Morphuntion in a | ||
safe manner while also squeezing the most potential out of the library as possible. | ||
|
||
### Caching | ||
At the time of writing, caching is a one-way street. Once an object that utilizes caching functionality with some data, | ||
it remains in-memory until the process has terminated. Reloading of such caches are not supported, since that involves | ||
ensuring that all dependencies in the process space sharing the same resources have also stopped and released the same | ||
resources. | ||
|
||
The caching being done by Morphuntion lowers the lookup time for many portions of the | ||
`morphuntion::dialog::CommonConceptFactory` | ||
operations. It is for this reason that it may be a good idea to initialize these constructs before lookup time, so | ||
that Morphuntion is in a "warmed up" state. | ||
|
||
It is important to note that many of these cached data structures have ties to specific references in Morphuntion's | ||
memory-mapped dictionaries. This makes reloading dictionaries difficult. | ||
|
||
#### Grammar synthesizer caching | ||
Grammar synthesizers memory map lexical dictionaries and cache various grammatical structures depending on the language. | ||
Synthesized words are not cached. | ||
|
||
### Multi-threading | ||
Morphuntion is multi-thread friendly. It has <code>std::mutex</code> in places where deadlocks could occur, and | ||
generally tries to abstract this away from users. | ||
|
||
## History | ||
|
||
This project was donated to the Unicode consortium from Siri at Apple Inc. These additional | ||
resources may be helpful background information to reference: | ||
|
||
* [Automatic Grammar Agreement in Message Formatting](https://www.youtube.com/watch?v=C2e7hYIkqoM) ([2023.11.8](https://www.unicode.org/events/utw/2023/talks/grammar/)) | ||
* [Authoring Grammatically Correct Conversational Templates for Siri](https://www.youtube.com/watch?v=emlIWUTaJFM) ([2020.10.16](https://www.unicodeconference.org/iuc44/Conference_Program.pdf)) | ||
* [Let's Come To An Agreement About Our Words](https://www.youtube.com/watch?v=KclVxxHX26k) ([2017.02.16](https://www.imug.org/events/imug-2017-events.htm#words)) | ||
|
||
## Dependencies | ||
|
||
The following are the dependencies to use this code. | ||
|
||
| Library | runtime | build time | test time | Note | | ||
|---------------------------------------------------|:-------:|:----------:|:---------:|--------------------------| | ||
| [CoreFoundation](https://www.swift.org/) | ✅ | ✅ | ✅ | automatically downloaded | | ||
| [ICU4C](https://icu.unicode.org/) | ✅ | ✅ | ✅ | | | ||
| [marisa](https://github.com/s-yata/marisa-trie) | ✅ | ✅ | ✅ | statically linked | | ||
| [cmake](https://cmake.org/) | | ✅ | | | | ||
| [libxml2](https://gitlab.gnome.org/GNOME/libxml2) | | ✅ | ✅ | | | ||
| [Catch2](https://github.com/catchorg/Catch2/) | | | ✅ | automatically downloaded | | ||
|
||
### Building | ||
|
||
Before building this project, you must have a distribution of ICU4C available. The path to the ICU distribution must be | ||
set as ICU_ROOT in either options.mk or as a command line argument to cmake. The path should be the same as the | ||
--prefix value used when ICU was configured, built and installed. | ||
|
||
If you want to build this project faster, you can adjust the number of concurrent build jobs used when compiling. | ||
|
||
``` | ||
cd morphuntion | ||
mkdir build | ||
cd build | ||
CC=clang CXX=clang++ cmake -DICU_ROOT=<PATH_TO_ICU> .. | ||
make -j 8 check | ||
``` | ||
|
||
Optionally, ICU_ROOT can be specified in the file options.mk with the following type of syntax. | ||
``` | ||
ICU_ROOT=<PATH_TO_ICU> | ||
``` |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we keep the Apple copyright message? Mostly a question for Anne and George.