Hello libcode-unicode
This commit is contained in:
commit
cbad731152
17
.editorconfig
Normal file
17
.editorconfig
Normal file
@ -0,0 +1,17 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
|
||||
[*.md]
|
||||
indent_size = 4
|
||||
max_line_length = off
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
[*.yaml]
|
||||
indent_size = 2
|
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
||||
* text=auto
|
24
.gitea/workflows/on-push.yaml
Normal file
24
.gitea/workflows/on-push.yaml
Normal file
@ -0,0 +1,24 @@
|
||||
name: on-push
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
build-and-test:
|
||||
runs-on: linux
|
||||
container: code.helloryan.se/infra/buildenv/cxx-amd64-fedora-40:latest
|
||||
volumes:
|
||||
- /build
|
||||
steps:
|
||||
- name: Clone repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Authenticate
|
||||
run: |
|
||||
git config unset http.https://code.helloryan.se/.extraheader
|
||||
echo "${{ secrets.NETRC }}" >> ~/.netrc
|
||||
- name: Initialize
|
||||
run: |
|
||||
bpkg create -d /build cc config.cc.coptions="-Wall -Werror"
|
||||
bdep init -A /build
|
||||
- name: Build
|
||||
run: b
|
||||
- name: Test
|
||||
run: b test
|
31
.gitignore
vendored
Normal file
31
.gitignore
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
.bdep/
|
||||
|
||||
# Local default options files.
|
||||
#
|
||||
.build2/local/
|
||||
|
||||
# Compiler/linker output.
|
||||
#
|
||||
*.d
|
||||
*.t
|
||||
*.i
|
||||
*.i.*
|
||||
*.ii
|
||||
*.ii.*
|
||||
*.o
|
||||
*.obj
|
||||
*.gcm
|
||||
*.pcm
|
||||
*.ifc
|
||||
*.so
|
||||
*.dylib
|
||||
*.dll
|
||||
*.a
|
||||
*.lib
|
||||
*.exp
|
||||
*.pdb
|
||||
*.ilk
|
||||
*.exe
|
||||
*.exe.dlls/
|
||||
*.exe.manifest
|
||||
*.pc
|
31
LICENSE
Normal file
31
LICENSE
Normal file
@ -0,0 +1,31 @@
|
||||
Copyright © 2024 Ryan. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. All advertising materials mentioning features or use of this software must
|
||||
display the following acknowledgement:
|
||||
|
||||
This product includes software developed by Ryan, http://helloryan.se/.
|
||||
|
||||
4. Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
|
||||
NO EVENT SHALL COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
21
README.md
Normal file
21
README.md
Normal file
@ -0,0 +1,21 @@
|
||||
# libcode-unicode
|
||||
|
||||
![Build status](https://code.helloryan.se/code/libcode-unicode/actions/workflows/on-push.yaml/badge.svg)
|
||||
|
||||
## Requirements
|
||||
|
||||
None, other than a modern C++-compiler.
|
||||
|
||||
## Building
|
||||
|
||||
See the wiki, https://code.helloryan.se/code/wiki/wiki/Build-Instructions, for
|
||||
build instructions.
|
||||
|
||||
## Contact
|
||||
|
||||
Please report bugs and issues by sending an e-mail to: ryan@helloryan.se.
|
||||
|
||||
## Contributing
|
||||
|
||||
Please send an e-mail to ryan@helloryan.se to request an account and
|
||||
write-access to the libcode-unicode repository.
|
4
build/.gitignore
vendored
Normal file
4
build/.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
/config.build
|
||||
/root/
|
||||
/bootstrap/
|
||||
build/
|
7
build/bootstrap.build
Normal file
7
build/bootstrap.build
Normal file
@ -0,0 +1,7 @@
|
||||
project = libcode-unicode
|
||||
|
||||
using version
|
||||
using config
|
||||
using test
|
||||
using install
|
||||
using dist
|
6
build/export.build
Normal file
6
build/export.build
Normal file
@ -0,0 +1,6 @@
|
||||
$out_root/
|
||||
{
|
||||
include code/unicode/
|
||||
}
|
||||
|
||||
export $out_root/code/unicode/$import.target
|
16
build/root.build
Normal file
16
build/root.build
Normal file
@ -0,0 +1,16 @@
|
||||
# Uncomment to suppress warnings coming from external libraries.
|
||||
#
|
||||
#cxx.internal.scope = current
|
||||
|
||||
cxx.std = latest
|
||||
|
||||
using cxx
|
||||
|
||||
hxx{*}: extension = hxx
|
||||
ixx{*}: extension = ixx
|
||||
txx{*}: extension = txx
|
||||
cxx{*}: extension = cxx
|
||||
|
||||
# The test target for cross-testing (running tests under Wine, etc).
|
||||
#
|
||||
test.target = $cxx.target
|
5
buildfile
Normal file
5
buildfile
Normal file
@ -0,0 +1,5 @@
|
||||
./: {code/ tests/} doc{README.md} legal{LICENSE} manifest
|
||||
|
||||
# Don't install tests.
|
||||
#
|
||||
tests/: install = false
|
9
code/unicode/.gitignore
vendored
Normal file
9
code/unicode/.gitignore
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
# Generated version header.
|
||||
#
|
||||
version.hxx
|
||||
|
||||
# Unit test executables and Testscript output directories
|
||||
# (can be symlinks).
|
||||
#
|
||||
*.test
|
||||
test-*.test
|
66
code/unicode/buildfile
Normal file
66
code/unicode/buildfile
Normal file
@ -0,0 +1,66 @@
|
||||
intf_libs = # Interface dependencies.
|
||||
impl_libs = # Implementation dependencies.
|
||||
|
||||
./: lib{code-unicode}: libul{code-unicode}
|
||||
|
||||
libul{code-unicode}: {hxx ixx txx cxx}{** -**.test... -version} \
|
||||
{hxx }{ version}
|
||||
|
||||
libul{code-unicode}: $impl_libs $intf_libs
|
||||
|
||||
# Unit tests.
|
||||
#
|
||||
exe{*.test}:
|
||||
{
|
||||
test = true
|
||||
install = false
|
||||
}
|
||||
|
||||
test_libs =
|
||||
import test_libs =+ libcode-validation%lib{code-validation}
|
||||
|
||||
for t: cxx{**.test...}
|
||||
{
|
||||
d = $directory($t)
|
||||
n = $name($t)...
|
||||
|
||||
./: $d/exe{$n}: $t $d/{hxx ixx txx}{+$n} $d/testscript{+$n} $test_libs
|
||||
$d/exe{$n}: libul{code-unicode}: bin.whole = false
|
||||
$d/exe{$n}: test.arguments = -v -v
|
||||
}
|
||||
|
||||
hxx{version}: in{version} $src_root/manifest
|
||||
{
|
||||
dist = true
|
||||
clean = ($src_root != $out_root)
|
||||
}
|
||||
|
||||
# Build options.
|
||||
#
|
||||
cxx.poptions =+ "-I$out_root" "-I$src_root"
|
||||
|
||||
# Export options.
|
||||
#
|
||||
lib{code-unicode}:
|
||||
{
|
||||
cxx.export.poptions = "-I$out_root" "-I$src_root"
|
||||
cxx.export.libs = $intf_libs
|
||||
}
|
||||
|
||||
# For pre-releases use the complete version to make sure they cannot
|
||||
# be used in place of another pre-release or the final version. See
|
||||
# the version module for details on the version.* variable values.
|
||||
#
|
||||
if $version.pre_release
|
||||
lib{code-unicode}: bin.lib.version = "-$version.project_id"
|
||||
else
|
||||
lib{code-unicode}: bin.lib.version = "-$version.major.$version.minor"
|
||||
|
||||
# Install into the code/unicode/ subdirectory of, say, /usr/include/
|
||||
# recreating subdirectories.
|
||||
#
|
||||
{hxx ixx txx}{*}:
|
||||
{
|
||||
install = include/code/unicode/
|
||||
install.subdirs = true
|
||||
}
|
50
code/unicode/decoding.hxx
Normal file
50
code/unicode/decoding.hxx
Normal file
@ -0,0 +1,50 @@
|
||||
#ifndef code__unicode__decoding_hxx_
|
||||
#define code__unicode__decoding_hxx_
|
||||
|
||||
#include <code/unicode/unicode.hxx>
|
||||
|
||||
#include <cstdint>
|
||||
#include <istream>
|
||||
#include <iterator>
|
||||
|
||||
namespace code::unicode
|
||||
{
|
||||
|
||||
class Decoder
|
||||
{
|
||||
public:
|
||||
virtual
|
||||
~Decoder() = default;
|
||||
|
||||
virtual
|
||||
std::uint32_t
|
||||
extract(std::istream& i) const = 0;
|
||||
|
||||
};
|
||||
|
||||
class Utf8_decoder
|
||||
: public Decoder
|
||||
{
|
||||
public:
|
||||
std::uint32_t
|
||||
extract(std::istream& i) const override;
|
||||
|
||||
template< typename InputIterator >
|
||||
std::uint32_t
|
||||
decode(InputIterator& it, InputIterator const& end) const;
|
||||
|
||||
template< typename InputIterator >
|
||||
std::uint32_t
|
||||
decode(InputIterator&& it, InputIterator const& end) const
|
||||
{
|
||||
return decode(it, end);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // namespace code::unicode
|
||||
|
||||
#include <code/unicode/decoding.ixx>
|
||||
#include <code/unicode/decoding.txx>
|
||||
|
||||
#endif
|
13
code/unicode/decoding.ixx
Normal file
13
code/unicode/decoding.ixx
Normal file
@ -0,0 +1,13 @@
|
||||
namespace code::unicode
|
||||
{
|
||||
|
||||
inline
|
||||
std::uint32_t
|
||||
Utf8_decoder::
|
||||
extract(std::istream& i) const
|
||||
{
|
||||
return decode(std::istreambuf_iterator<char>{ i },
|
||||
std::istreambuf_iterator<char>{});
|
||||
}
|
||||
|
||||
} // namespace code::unicode
|
67
code/unicode/decoding.test.cxx
Normal file
67
code/unicode/decoding.test.cxx
Normal file
@ -0,0 +1,67 @@
|
||||
#include <code/unicode/decoding.hxx>
|
||||
|
||||
#include <code/validation/main.hxx>
|
||||
|
||||
#include <string>
|
||||
|
||||
VALIDATION_TEST(test_1)
|
||||
{
|
||||
std::string const encoded_0{ "\x00", 1 };
|
||||
std::string const encoded_1{ "\x7f", 1 };
|
||||
|
||||
code::unicode::Utf8_decoder utf8;
|
||||
|
||||
auto decoded_0 = utf8.decode(encoded_0.begin(), encoded_0.end());
|
||||
auto decoded_1 = utf8.decode(encoded_1.begin(), encoded_1.end());
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(decoded_0, 0U);
|
||||
VALIDATION_ASSERT_EQUAL(decoded_1, 0x7fU);
|
||||
}
|
||||
|
||||
VALIDATION_TEST(test_2)
|
||||
{
|
||||
std::string const encoded_0{ "\xc2\x80" };
|
||||
std::string const encoded_1{ "\xdf\xbf" };
|
||||
|
||||
code::unicode::Utf8_decoder utf8;
|
||||
|
||||
auto decoded_0 = utf8.decode(encoded_0.begin(), encoded_0.end());
|
||||
auto decoded_1 = utf8.decode(encoded_1.begin(), encoded_1.end());
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(decoded_0, 0x80U);
|
||||
VALIDATION_ASSERT_EQUAL(decoded_1, 0x7ffU);
|
||||
}
|
||||
|
||||
VALIDATION_TEST(test_3)
|
||||
{
|
||||
std::string const encoded_0{ "\xe0\xa0\x80" };
|
||||
std::string const encoded_1{ "\xef\xbf\xbf" };
|
||||
|
||||
code::unicode::Utf8_decoder utf8;
|
||||
|
||||
auto decoded_0 = utf8.decode(encoded_0.begin(), encoded_0.end());
|
||||
auto decoded_1 = utf8.decode(encoded_1.begin(), encoded_1.end());
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(decoded_0, 0x800U);
|
||||
VALIDATION_ASSERT_EQUAL(decoded_1, 0xffffU);
|
||||
}
|
||||
|
||||
VALIDATION_TEST(test_4)
|
||||
{
|
||||
std::string const encoded_0{ "\xf0\x90\x80\x80" };
|
||||
std::string const encoded_1{ "\xf4\x8f\xbf\xbf" };
|
||||
|
||||
code::unicode::Utf8_decoder utf8;
|
||||
|
||||
auto decoded_0 = utf8.decode(encoded_0.begin(), encoded_0.end());
|
||||
auto decoded_1 = utf8.decode(encoded_1.begin(), encoded_1.end());
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(decoded_0, 0x10000U);
|
||||
VALIDATION_ASSERT_EQUAL(decoded_1, 0x10ffffU);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
return code::validation::main(argc, argv);
|
||||
}
|
70
code/unicode/decoding.txx
Normal file
70
code/unicode/decoding.txx
Normal file
@ -0,0 +1,70 @@
|
||||
namespace code::unicode {
|
||||
|
||||
template<typename InputIterator>
|
||||
std::uint32_t
|
||||
Utf8_decoder::
|
||||
decode(InputIterator& it, InputIterator const& end) const
|
||||
{
|
||||
if (it == end)
|
||||
return replacement_character;
|
||||
|
||||
std::uint32_t c1 = (unsigned char)*it++;
|
||||
|
||||
// 1 byte (valid: 0 <= codepoint <= 0x7f)
|
||||
if (c1 <= 0x7f)
|
||||
return c1;
|
||||
|
||||
if (it == end)
|
||||
return replacement_character;
|
||||
|
||||
std::uint32_t c2 = (unsigned char)*it++;
|
||||
|
||||
// 2 bytes (valid: 0x80 <= codepoint <= 0x7ff)
|
||||
if (c1 <= 0b11011111) {
|
||||
std::uint32_t c = ((c1 & 0b00011111) << 6) | (c2 & 0b00111111);
|
||||
|
||||
if (c < 0x80) // overlong protection
|
||||
return replacement_character;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
if (it == end)
|
||||
return replacement_character;
|
||||
|
||||
std::uint32_t c3 = (unsigned char)*it++;
|
||||
|
||||
// 3 bytes (valid: 0x800 <= codepoint <= 0xffff)
|
||||
if (c1 <= 0b11101111) {
|
||||
std::uint32_t c =
|
||||
((c1 & 0b00001111) << 12) | ((c2 & 0b00111111) << 6) | (c3 & 0b00111111);
|
||||
|
||||
if (c < 0x800) // overlong protection
|
||||
return replacement_character;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
if (it == end)
|
||||
return replacement_character;
|
||||
|
||||
std::uint32_t c4 = (unsigned char)*it++;
|
||||
|
||||
// 4 bytes (valid: 0x10000 <= codepoint <= 0x10ffff)
|
||||
if (c1 <= 0b11110111) {
|
||||
std::uint32_t c = ((c1 & 0b00000111) << 18) | ((c2 & 0b00111111) << 12) |
|
||||
((c3 & 0b00111111) << 6) | (c4 & 0b00111111);
|
||||
|
||||
if (c < 0x10000) // overlong protection
|
||||
return replacement_character;
|
||||
|
||||
if (0x10ffff < c) // overflow protection
|
||||
return replacement_character;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
return replacement_character;
|
||||
}
|
||||
|
||||
} // namespace code::unicode
|
35
code/unicode/encoding.hxx
Normal file
35
code/unicode/encoding.hxx
Normal file
@ -0,0 +1,35 @@
|
||||
#ifndef code__unicode__encoding_hxx_
|
||||
#define code__unicode__encoding_hxx_
|
||||
|
||||
#include <cstdint>
|
||||
#include <ostream>
|
||||
|
||||
namespace code::unicode
|
||||
{
|
||||
|
||||
class Encoder
|
||||
{
|
||||
public:
|
||||
virtual
|
||||
~Encoder() = default;
|
||||
|
||||
virtual
|
||||
void
|
||||
encode(std::ostream& o, std::uint32_t c) const = 0;
|
||||
|
||||
};
|
||||
|
||||
class Utf8_encoder
|
||||
: public Encoder
|
||||
{
|
||||
public:
|
||||
void
|
||||
encode(std::ostream& o, std::uint32_t c) const override;
|
||||
|
||||
};
|
||||
|
||||
} // namespace code::unicode
|
||||
|
||||
#include <code/unicode/encoding.ixx>
|
||||
|
||||
#endif
|
40
code/unicode/encoding.ixx
Normal file
40
code/unicode/encoding.ixx
Normal file
@ -0,0 +1,40 @@
|
||||
namespace code::unicode
|
||||
{
|
||||
|
||||
inline
|
||||
void
|
||||
Utf8_encoder::
|
||||
encode(std::ostream& o, std::uint32_t c) const
|
||||
{
|
||||
// 1 byte
|
||||
if (c <= 0x7f) {
|
||||
o.put(c);
|
||||
return;
|
||||
}
|
||||
|
||||
// 2 bytes
|
||||
if (c <= 0x7FF) {
|
||||
o.put(0b11000000 | ((c >> 6) & 0b00011111));
|
||||
o.put(0b10000000 | (c & 0b00111111));
|
||||
return;
|
||||
}
|
||||
|
||||
// 3 bytes
|
||||
if (c <= 0xFFFF) {
|
||||
o.put(0b11100000 | ((c >> 12) & 0b00001111));
|
||||
o.put(0b10000000 | ((c >> 6) & 0b00111111));
|
||||
o.put(0b10000000 | (c & 0b00111111));
|
||||
return;
|
||||
}
|
||||
|
||||
// 4 bytes
|
||||
if (c <= 0x10FFFF) {
|
||||
o.put(0b11110000 | ((c >> 18) & 0b00000111));
|
||||
o.put(0b10000000 | ((c >> 12) & 0b00111111));
|
||||
o.put(0b10000000 | ((c >> 6) & 0b00111111));
|
||||
o.put(0b10000000 | (c & 0b00111111));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace code::unicode
|
128
code/unicode/encoding.test.cxx
Normal file
128
code/unicode/encoding.test.cxx
Normal file
@ -0,0 +1,128 @@
|
||||
#include <code/unicode/encoding.hxx>
|
||||
|
||||
#include <code/validation/main.hxx>
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
VALIDATION_TEST(test_1)
|
||||
{
|
||||
std::stringstream str;
|
||||
|
||||
code::unicode::Utf8_encoder utf8;
|
||||
utf8.encode(str, 0);
|
||||
|
||||
auto string = str.str();
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(string.size(), 1U);
|
||||
VALIDATION_ASSERT_EQUAL(string[0], 0);
|
||||
}
|
||||
|
||||
VALIDATION_TEST(test_2)
|
||||
{
|
||||
std::stringstream str;
|
||||
|
||||
code::unicode::Utf8_encoder utf8;
|
||||
utf8.encode(str, 0x7f);
|
||||
|
||||
auto string = str.str();
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(string.size(), 1U);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[0], 0x7f);
|
||||
}
|
||||
|
||||
VALIDATION_TEST(test_3)
|
||||
{
|
||||
std::stringstream str;
|
||||
|
||||
code::unicode::Utf8_encoder utf8;
|
||||
utf8.encode(str, 0x80);
|
||||
|
||||
auto string = str.str();
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(string.size(), 2U);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[0], 0xc2);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[1], 0x80);
|
||||
}
|
||||
|
||||
VALIDATION_TEST(test_4)
|
||||
{
|
||||
std::stringstream str;
|
||||
|
||||
code::unicode::Utf8_encoder utf8;
|
||||
utf8.encode(str, 0x7ff);
|
||||
|
||||
auto string = str.str();
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(string.size(), 2U);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[0], 0xdf);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[1], 0xbf);
|
||||
}
|
||||
|
||||
VALIDATION_TEST(test_5)
|
||||
{
|
||||
std::stringstream str;
|
||||
|
||||
code::unicode::Utf8_encoder utf8;
|
||||
utf8.encode(str, 0x800);
|
||||
|
||||
auto string = str.str();
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(string.size(), 3U);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[0], 0xe0);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[1], 0xa0);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[2], 0x80);
|
||||
}
|
||||
|
||||
VALIDATION_TEST(test_6)
|
||||
{
|
||||
std::stringstream str;
|
||||
|
||||
code::unicode::Utf8_encoder utf8;
|
||||
utf8.encode(str, 0xffff);
|
||||
|
||||
auto string = str.str();
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(string.size(), 3U);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[0], 0xef);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[1], 0xbf);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[2], 0xbf);
|
||||
}
|
||||
|
||||
VALIDATION_TEST(test_7)
|
||||
{
|
||||
std::stringstream str;
|
||||
|
||||
code::unicode::Utf8_encoder utf8;
|
||||
utf8.encode(str, 0x10000);
|
||||
|
||||
auto string = str.str();
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(string.size(), 4U);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[0], 0xf0);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[1], 0x90);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[2], 0x80);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[3], 0x80);
|
||||
}
|
||||
|
||||
VALIDATION_TEST(test_8)
|
||||
{
|
||||
std::stringstream str;
|
||||
|
||||
code::unicode::Utf8_encoder utf8;
|
||||
utf8.encode(str, 0x10ffff);
|
||||
|
||||
auto string = str.str();
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(string.size(), 4U);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[0], 0xf4);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[1], 0x8f);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[2], 0xbf);
|
||||
VALIDATION_ASSERT_EQUAL((unsigned char)string[3], 0xbf);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
return code::validation::main(argc, argv);
|
||||
}
|
124
code/unicode/iterator.hxx
Normal file
124
code/unicode/iterator.hxx
Normal file
@ -0,0 +1,124 @@
|
||||
#ifndef code__unicode__iterator_hxx_
|
||||
#define code__unicode__iterator_hxx_
|
||||
|
||||
#include <code/unicode/decoding.hxx>
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace code::unicode
|
||||
{
|
||||
|
||||
template<typename Decoder, typename IteratorType>
|
||||
class Unicode_input_iterator
|
||||
{
|
||||
public:
|
||||
using decoder_type = Decoder;
|
||||
using iterator_type = IteratorType;
|
||||
using value_type = std::uint32_t;
|
||||
|
||||
// TODO standard member types
|
||||
|
||||
Unicode_input_iterator()
|
||||
{}
|
||||
|
||||
explicit Unicode_input_iterator(iterator_type it) : it_{ std::move(it) }
|
||||
{
|
||||
decode();
|
||||
}
|
||||
|
||||
Unicode_input_iterator(iterator_type it, iterator_type end)
|
||||
: it_{ std::move(it) }, end_{ std::move(end) }
|
||||
{
|
||||
decode();
|
||||
}
|
||||
|
||||
Unicode_input_iterator(decoder_type decoder, iterator_type it)
|
||||
: decoder_{ std::move(decoder) }, it_{ std::move(it) }
|
||||
{
|
||||
decode();
|
||||
}
|
||||
|
||||
Unicode_input_iterator(decoder_type decoder,
|
||||
iterator_type it,
|
||||
iterator_type end)
|
||||
: decoder_{ std::move(decoder) },
|
||||
it_{ std::move(it) },
|
||||
end_{ std::move(end) }
|
||||
{
|
||||
decode();
|
||||
}
|
||||
|
||||
value_type operator*() const
|
||||
{
|
||||
return unicode_;
|
||||
}
|
||||
|
||||
Unicode_input_iterator&
|
||||
operator++()
|
||||
{
|
||||
decode();
|
||||
return *this;
|
||||
}
|
||||
|
||||
struct Proxy
|
||||
{
|
||||
value_type unicode_;
|
||||
|
||||
value_type operator*() const
|
||||
{
|
||||
return unicode_;
|
||||
}
|
||||
};
|
||||
|
||||
Proxy
|
||||
operator++(int)
|
||||
{
|
||||
auto unicode = unicode_;
|
||||
decode();
|
||||
return Proxy{unicode};
|
||||
}
|
||||
|
||||
bool
|
||||
equal(iterator_type const& other) const
|
||||
{
|
||||
return it_ == other;
|
||||
}
|
||||
|
||||
private:
|
||||
void
|
||||
decode()
|
||||
{
|
||||
if (it_ != end_)
|
||||
unicode_ = decoder_.decode(it_, end_);
|
||||
}
|
||||
|
||||
decoder_type decoder_;
|
||||
iterator_type it_;
|
||||
iterator_type end_;
|
||||
std::uint32_t unicode_{};
|
||||
};
|
||||
|
||||
template<typename InputIterator >
|
||||
using Utf8_input_iterator = Unicode_input_iterator<Utf8_decoder, InputIterator>;
|
||||
|
||||
template<typename Decoder, typename IteratorType>
|
||||
bool
|
||||
operator==(
|
||||
Unicode_input_iterator<Decoder, IteratorType> const& lhs,
|
||||
typename Unicode_input_iterator<Decoder, IteratorType>::iterator_type const& rhs)
|
||||
{
|
||||
return lhs.equal(rhs);
|
||||
}
|
||||
|
||||
template<typename Decoder, typename IteratorType>
|
||||
bool
|
||||
operator!=(
|
||||
Unicode_input_iterator<Decoder, IteratorType> const& lhs,
|
||||
typename Unicode_input_iterator< Decoder, IteratorType >::iterator_type const& rhs)
|
||||
{
|
||||
return !lhs.equal(rhs);
|
||||
}
|
||||
|
||||
} // namespace code::unicode
|
||||
|
||||
#endif
|
33
code/unicode/iterator.test.cxx
Normal file
33
code/unicode/iterator.test.cxx
Normal file
@ -0,0 +1,33 @@
|
||||
#include <code/unicode/iterator.hxx>
|
||||
|
||||
#include <code/validation/main.hxx>
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
VALIDATION_TEST(test_1)
|
||||
{
|
||||
std::string utf8{"hࠀ𐀀"};
|
||||
|
||||
code::unicode::Utf8_input_iterator<std::string::iterator> it{
|
||||
utf8.begin(), utf8.end()
|
||||
};
|
||||
|
||||
std::uint32_t c1 = *it++;
|
||||
std::uint32_t c2 = *it++;
|
||||
std::uint32_t c3 = *it++;
|
||||
std::uint32_t c4 = *it++;
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(it, utf8.end());
|
||||
|
||||
VALIDATION_ASSERT_EQUAL(c1, (std::uint32_t)'h');
|
||||
VALIDATION_ASSERT_EQUAL(c2, 0x80U);
|
||||
VALIDATION_ASSERT_EQUAL(c3, 0x800U);
|
||||
VALIDATION_ASSERT_EQUAL(c4, 0x10000U);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
return code::validation::main(argc, argv);
|
||||
}
|
21
code/unicode/iterator.txx
Normal file
21
code/unicode/iterator.txx
Normal file
@ -0,0 +1,21 @@
|
||||
namespace unicode {
|
||||
|
||||
template< typename Decoder, typename IteratorType >
|
||||
unicode_input_iterator< Decoder, InputIterator >::unicode_input_iterator(
|
||||
iterator_type&& it,
|
||||
iterator_type&& end)
|
||||
{}
|
||||
|
||||
template< typename Decoder, typename IteratorType >
|
||||
unicode_input_iterator< Decoder, IteratorType >::value_type
|
||||
unicode_input_iterator< Decoder, IteratorType >::operator*() const
|
||||
{}
|
||||
|
||||
bool
|
||||
unicode_input_iterator< Decoder, IteratorType >::equal(
|
||||
unicode_input_iterator const& other) const
|
||||
{
|
||||
return is_eof() == other.is_eof();
|
||||
}
|
||||
|
||||
} // namespace unicode
|
13
code/unicode/unicode.hxx
Normal file
13
code/unicode/unicode.hxx
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef code__unicode__unicode_hxx_
|
||||
#define code__unicode__unicode_hxx_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace code::unicode
|
||||
{
|
||||
|
||||
std::uint32_t constexpr replacement_character = 0xFFFD;
|
||||
|
||||
} // namespace code::unicode
|
||||
|
||||
#endif
|
34
code/unicode/version.hxx.in
Normal file
34
code/unicode/version.hxx.in
Normal file
@ -0,0 +1,34 @@
|
||||
#pragma once
|
||||
|
||||
// The numeric version format is AAAAABBBBBCCCCCDDDE where:
|
||||
//
|
||||
// AAAAA - major version number
|
||||
// BBBBB - minor version number
|
||||
// CCCCC - bugfix version number
|
||||
// DDD - alpha / beta (DDD + 500) version number
|
||||
// E - final (0) / snapshot (1)
|
||||
//
|
||||
// When DDDE is not 0, 1 is subtracted from AAAAABBBBBCCCCC. For example:
|
||||
//
|
||||
// Version AAAAABBBBBCCCCCDDDE
|
||||
//
|
||||
// 0.1.0 0000000001000000000
|
||||
// 0.1.2 0000000001000020000
|
||||
// 1.2.3 0000100002000030000
|
||||
// 2.2.0-a.1 0000200001999990010
|
||||
// 3.0.0-b.2 0000299999999995020
|
||||
// 2.2.0-a.1.z 0000200001999990011
|
||||
//
|
||||
#define LIBCODE_UNICODE_VERSION $libcode_unicode.version.project_number$ULL
|
||||
#define LIBCODE_UNICODE_VERSION_STR "$libcode_unicode.version.project$"
|
||||
#define LIBCODE_UNICODE_VERSION_ID "$libcode_unicode.version.project_id$"
|
||||
#define LIBCODE_UNICODE_VERSION_FULL "$libcode_unicode.version$"
|
||||
|
||||
#define LIBCODE_UNICODE_VERSION_MAJOR $libcode_unicode.version.major$
|
||||
#define LIBCODE_UNICODE_VERSION_MINOR $libcode_unicode.version.minor$
|
||||
#define LIBCODE_UNICODE_VERSION_PATCH $libcode_unicode.version.patch$
|
||||
|
||||
#define LIBCODE_UNICODE_PRE_RELEASE $libcode_unicode.version.pre_release$
|
||||
|
||||
#define LIBCODE_UNICODE_SNAPSHOT_SN $libcode_unicode.version.snapshot_sn$ULL
|
||||
#define LIBCODE_UNICODE_SNAPSHOT_ID "$libcode_unicode.version.snapshot_id$"
|
12
manifest
Normal file
12
manifest
Normal file
@ -0,0 +1,12 @@
|
||||
: 1
|
||||
name: libcode-unicode
|
||||
version: 0.1.0-a.0.z
|
||||
language: c++
|
||||
summary: libcode-unicode C++ library
|
||||
license: BSD-4-Clause
|
||||
description-file: README.md
|
||||
url: https://helloryan.se/code/
|
||||
email: ryan@helloryan.se
|
||||
depends: * build2 >= 0.17.0
|
||||
depends: * bpkg >= 0.17.0
|
||||
depends: libcode-validation ^0.1.0-
|
6
repositories.manifest
Normal file
6
repositories.manifest
Normal file
@ -0,0 +1,6 @@
|
||||
: 1
|
||||
summary: libcode-unicode project repository
|
||||
|
||||
:
|
||||
role: prerequisite
|
||||
location: https://code.helloryan.se/code/libcode-validation.git##HEAD
|
8
tests/.gitignore
vendored
Normal file
8
tests/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# Test executables.
|
||||
#
|
||||
driver
|
||||
|
||||
# Testscript output directories (can be symlinks).
|
||||
#
|
||||
test
|
||||
test-*
|
4
tests/build/.gitignore
vendored
Normal file
4
tests/build/.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
/config.build
|
||||
/root/
|
||||
/bootstrap/
|
||||
build/
|
5
tests/build/bootstrap.build
Normal file
5
tests/build/bootstrap.build
Normal file
@ -0,0 +1,5 @@
|
||||
project = # Unnamed tests subproject.
|
||||
|
||||
using config
|
||||
using test
|
||||
using dist
|
16
tests/build/root.build
Normal file
16
tests/build/root.build
Normal file
@ -0,0 +1,16 @@
|
||||
cxx.std = latest
|
||||
|
||||
using cxx
|
||||
|
||||
hxx{*}: extension = hxx
|
||||
ixx{*}: extension = ixx
|
||||
txx{*}: extension = txx
|
||||
cxx{*}: extension = cxx
|
||||
|
||||
# Every exe{} in this subproject is by default a test.
|
||||
#
|
||||
exe{*}: test = true
|
||||
|
||||
# The test target for cross-testing (running tests under Wine, etc).
|
||||
#
|
||||
test.target = $cxx.target
|
1
tests/buildfile
Normal file
1
tests/buildfile
Normal file
@ -0,0 +1 @@
|
||||
./: {*/ -build/}
|
Loading…
x
Reference in New Issue
Block a user