diff --git a/single_include/upa/.gitignore b/single_include/upa/.gitignore new file mode 100644 index 0000000..faf61e6 --- /dev/null +++ b/single_include/upa/.gitignore @@ -0,0 +1,2 @@ +*.cpp +*.h diff --git a/tools/amalgamate.bat b/tools/amalgamate.bat new file mode 100644 index 0000000..ad02f26 --- /dev/null +++ b/tools/amalgamate.bat @@ -0,0 +1,12 @@ +@echo off + +REM the directory path of this file +set p=%~dp0 + +REM CD to repository root folder +%~d0 +cd %p%.. + +REM Amalgamate +python tools/amalgamate/amalgamate.py -c tools/amalgamate/config-cpp.json -s . -p tools/amalgamate/config-cpp.prologue +python tools/amalgamate/amalgamate.py -c tools/amalgamate/config-h.json -s . diff --git a/tools/amalgamate.sh b/tools/amalgamate.sh new file mode 100755 index 0000000..b0a1b59 --- /dev/null +++ b/tools/amalgamate.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +# the directory path of this file +# https://stackoverflow.com/q/59895 +p="$(dirname "$0")" + +# CD to repository root folder +cd $p/.. + +# Amalgamate +python3 tools/amalgamate/amalgamate.py -c tools/amalgamate/config-cpp.json -s . -p tools/amalgamate/config-cpp.prologue +python3 tools/amalgamate/amalgamate.py -c tools/amalgamate/config-h.json -s . diff --git a/tools/amalgamate/LICENSE.md b/tools/amalgamate/LICENSE.md new file mode 100644 index 0000000..7fe9cf0 --- /dev/null +++ b/tools/amalgamate/LICENSE.md @@ -0,0 +1,27 @@ +amalgamate.py - Amalgamate C source and header files +Copyright (c) 2012, Erik Edlund + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of Erik Edlund, nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/tools/amalgamate/README.md b/tools/amalgamate/README.md new file mode 100644 index 0000000..975ca0b --- /dev/null +++ b/tools/amalgamate/README.md @@ -0,0 +1,66 @@ + +# amalgamate.py - Amalgamate C source and header files + +Origin: https://bitbucket.org/erikedlund/amalgamate + +Mirror: https://github.com/edlund/amalgamate + +`amalgamate.py` aims to make it easy to use SQLite-style C source and header +amalgamation in projects. + +For more information, please refer to: http://sqlite.org/amalgamation.html + +## Here be dragons + +`amalgamate.py` is quite dumb, it only knows the bare minimum about C code +required in order to be able to handle trivial include directives. It can +produce weird results for unexpected code. + +Things to be aware of: + +`amalgamate.py` will not handle complex include directives correctly: + + #define HEADER_PATH "path/to/header.h" + #include HEADER_PATH + +In the above example, `path/to/header.h` will not be included in the +amalgamation (HEADER_PATH is never expanded). + +`amalgamate.py` makes the assumption that each source and header file which +is not empty will end in a new-line character, which is not immediately +preceded by a backslash character (see 5.1.1.2p1.2 of ISO C99). + +`amalgamate.py` should be usable with C++ code, but raw string literals from +C++11 will definitely cause problems: + + R"delimiter(Terrible raw \ data " #include )delimiter" + R"delimiter(Terrible raw \ data " escaping)delimiter" + +In the examples above, `amalgamate.py` will stop parsing the raw string literal +when it encounters the first quotation mark, which will produce unexpected +results. + +## Installing amalgamate.py + +Python v.2.7.0 or higher is required. + +`amalgamate.py` can be tested and installed using the following commands: + + ./test.sh && sudo -k cp ./amalgamate.py /usr/local/bin/ + +## Using amalgamate.py + + amalgamate.py [-v] -c path/to/config.json -s path/to/source/dir \ + [-p path/to/prologue.(c|h)] + + * The `-c, --config` option should specify the path to a JSON config file which + lists the source files, include paths and where to write the resulting + amalgamation. Have a look at `test/source.c.json` and `test/include.h.json` + to see two examples. + + * The `-s, --source` option should specify the path to the source directory. + This is useful for supporting separate source and build directories. + + * The `-p, --prologue` option should specify the path to a file which will be + added to the beginning of the amalgamation. It is optional. + diff --git a/tools/amalgamate/amalgamate.py b/tools/amalgamate/amalgamate.py new file mode 100755 index 0000000..7a7a3ed --- /dev/null +++ b/tools/amalgamate/amalgamate.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 + +# amalgamate.py - Amalgamate C source and header files. +# Copyright (c) 2012, Erik Edlund +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Erik Edlund, nor the names of its contributors may +# be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import datetime +import json +import os +import re +import io + +class Amalgamation(object): + + # Prepends self.source_path to file_path if needed. + def actual_path(self, file_path): + if not os.path.isabs(file_path): + file_path = os.path.join(self.source_path, file_path) + return file_path + + # Search included file_path in self.include_paths and + # in source_dir if specified. + def find_included_file(self, file_path, source_dir): + search_dirs = self.include_paths[:] + if source_dir: + search_dirs.insert(0, source_dir) + + for search_dir in search_dirs: + search_path = os.path.join(search_dir, file_path) + if os.path.isfile(self.actual_path(search_path)): + return search_path + return None + + def __init__(self, args): + with open(args.config, 'r') as f: + self.head = [] # default + self.ingnore_includes = False + config = json.loads(f.read()) + for key in config: + setattr(self, key, config[key]) + + self.verbose = args.verbose == "yes" + self.prologue = args.prologue + self.source_path = args.source_path + self.included_files = [] + + # Generate the amalgamation and write it to the target file. + def generate(self): + amalgamation = "" + + if self.prologue: + with io.open(self.prologue, mode="r", encoding="utf-8") as f: + amalgamation += datetime.datetime.now().strftime(f.read()) + + if self.verbose: + print("Config:") + print(" target = {0}".format(self.target)) + print(" working_dir = {0}".format(os.getcwd())) + print(" include_paths = {0}".format(self.include_paths)) + print("Creating amalgamation:") + for file_path in self.head: + actual_path = self.actual_path(file_path) + with io.open(actual_path, mode="r", encoding="utf-8") as f: + amalgamation += f.read() + for file_path in self.sources: + # Do not check the include paths while processing the source + # list, all given source paths must be correct. + # actual_path = self.actual_path(file_path) + print(" - processing \"{0}\"".format(file_path)) + t = TranslationUnit(file_path, self, True) + amalgamation += t.content + + with io.open(self.target, mode="w", encoding="utf-8") as f: + f.write(amalgamation) + + print("...done!\n") + if self.verbose: + print("Files processed: {0}".format(self.sources)) + print("Files included: {0}".format(self.included_files)) + print("") + +class TranslationUnit(object): + + # // C++ comment. + cpp_comment_pattern = re.compile(r"//.*?\n") + + # /* C comment. */ + c_comment_pattern = re.compile(r"/\*.*?\*/", re.S) + + # "complex \"stri\\\ng\" value". + string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S) + + # Handle simple include directives. Support for advanced + # directives where macros and defines needs to expanded is + # not a concern right now. + include_pattern = re.compile( + r'#\s*include\s+(<|")(?P.*?)("|>)', re.S) + + # #pragma once + pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S) + + # Search for pattern in self.content, add the match to + # contexts if found and update the index accordingly. + def _search_content(self, index, pattern, contexts): + match = pattern.search(self.content, index) + if match: + contexts.append(match) + return match.end() + return index + 2 + + # Return all the skippable contexts, i.e., comments and strings + def _find_skippable_contexts(self): + # Find contexts in the content in which a found include + # directive should not be processed. + skippable_contexts = [] + + # Walk through the content char by char, and try to grab + # skippable contexts using regular expressions when found. + i = 1 + content_len = len(self.content) + while i < content_len: + j = i - 1 + current = self.content[i] + previous = self.content[j] + + if current == '"': + # String value. + i = self._search_content(j, self.string_pattern, + skippable_contexts) + elif current == '*' and previous == '/': + # C style comment. + i = self._search_content(j, self.c_comment_pattern, + skippable_contexts) + elif current == '/' and previous == '/': + # C++ style comment. + i = self._search_content(j, self.cpp_comment_pattern, + skippable_contexts) + else: + # Skip to the next char. + i += 1 + + return skippable_contexts + + # Returns True if the match is within list of other matches + def _is_within(self, match, matches): + for m in matches: + if match.start() > m.start() and \ + match.end() < m.end(): + return True + return False + + # Removes pragma once from content + def _process_pragma_once(self): + content_len = len(self.content) + if content_len < len("#include "): + return 0 + + # Find contexts in the content in which a found include + # directive should not be processed. + skippable_contexts = self._find_skippable_contexts() + + pragmas = [] + pragma_once_match = self.pragma_once_pattern.search(self.content) + while pragma_once_match: + if not self._is_within(pragma_once_match, skippable_contexts): + pragmas.append(pragma_once_match) + + pragma_once_match = self.pragma_once_pattern.search(self.content, + pragma_once_match.end()) + + # Handle all collected pragma once directives. + prev_end = 0 + tmp_content = '' + for pragma_match in pragmas: + tmp_content += self.content[prev_end:pragma_match.start()] + prev_end = pragma_match.end() + tmp_content += self.content[prev_end:] + self.content = tmp_content + + # Include all trivial #include directives into self.content. + def _process_includes(self): + content_len = len(self.content) + if content_len < len("#include "): + return 0 + + # Find contexts in the content in which a found include + # directive should not be processed. + skippable_contexts = self._find_skippable_contexts() + + # Search for include directives in the content, collect those + # which should be included into the content. + includes = [] + include_match = self.include_pattern.search(self.content) + while include_match: + if not self._is_within(include_match, skippable_contexts): + include_path = include_match.group("path") + search_same_dir = include_match.group(1) == '"' + found_included_path = self.amalgamation.find_included_file( + include_path, self.file_dir if search_same_dir else None) + if found_included_path: + includes.append((include_match, found_included_path)) + + include_match = self.include_pattern.search(self.content, + include_match.end()) + + # Handle all collected include directives. + prev_end = 0 + tmp_content = '' + for include in includes: + include_match, found_included_path = include + tmp_content += self.content[prev_end:include_match.start()] + tmp_content += "// {0}\n".format(include_match.group(0)) + if not self.amalgamation.ingnore_includes and not found_included_path in self.amalgamation.included_files: + t = TranslationUnit(found_included_path, self.amalgamation, False) + tmp_content += t.content + prev_end = include_match.end() + tmp_content += self.content[prev_end:] + self.content = tmp_content + + return len(includes) + + # Make all content processing + def _process(self): + if not self.is_root: + self._process_pragma_once() + self._process_includes() + + def __init__(self, file_path, amalgamation, is_root): + self.file_path = file_path + self.file_dir = os.path.dirname(file_path) + self.amalgamation = amalgamation + self.is_root = is_root + + self.amalgamation.included_files.append(self.file_path) + + actual_path = self.amalgamation.actual_path(file_path) + if not os.path.isfile(actual_path): + raise IOError("File not found: \"{0}\"".format(file_path)) + with io.open(actual_path, mode="r", encoding="utf-8") as f: + self.content = f.read() + self._process() + +def main(): + description = "Amalgamate C source and header files." + usage = " ".join([ + "amalgamate.py", + "[-v]", + "-c path/to/config.json", + "-s path/to/source/dir", + "[-p path/to/prologue.(c|h)]" + ]) + argsparser = argparse.ArgumentParser( + description=description, usage=usage) + + argsparser.add_argument("-v", "--verbose", dest="verbose", + choices=["yes", "no"], metavar="", help="be verbose") + + argsparser.add_argument("-c", "--config", dest="config", + required=True, metavar="", help="path to a JSON config file") + + argsparser.add_argument("-s", "--source", dest="source_path", + required=True, metavar="", help="source code path") + + argsparser.add_argument("-p", "--prologue", dest="prologue", + required=False, metavar="", help="path to a C prologue file") + + amalgamation = Amalgamation(argsparser.parse_args()) + amalgamation.generate() + +if __name__ == "__main__": + main() + diff --git a/tools/amalgamate/config-cpp.json b/tools/amalgamate/config-cpp.json new file mode 100644 index 0000000..e4ef124 --- /dev/null +++ b/tools/amalgamate/config-cpp.json @@ -0,0 +1,16 @@ +{ + "project": "upa/url.cpp", + "target": "single_include/upa/url.cpp", + "sources": [ + "src/url.cpp", + "src/url_idna.cpp", + "src/url_ip.cpp", + "src/url_percent_encode.cpp", + "src/url_search_params.cpp", + "src/url_utf.cpp" + ], + "include_paths": [ + "include" + ], + "ingnore_includes": true +} diff --git a/tools/amalgamate/config-cpp.prologue b/tools/amalgamate/config-cpp.prologue new file mode 100644 index 0000000..8381f03 --- /dev/null +++ b/tools/amalgamate/config-cpp.prologue @@ -0,0 +1,7 @@ +// Copyright 2016-2023 Rimas Misevičius +// Distributed under the BSD-style license that can be +// found in the LICENSE file. +// + +#include "url.h" + diff --git a/tools/amalgamate/config-h.json b/tools/amalgamate/config-h.json new file mode 100644 index 0000000..97229fd --- /dev/null +++ b/tools/amalgamate/config-h.json @@ -0,0 +1,10 @@ +{ + "project": "upa/url.h", + "target": "single_include/upa/url.h", + "sources": [ + "include/upa/url.h" + ], + "include_paths": [ + "include/upa" + ] +}