Open Chinese Convert 1.3.2
A project for conversion between Traditional and Simplified Chinese
Loading...
Searching...
No Matches
MarisaDict.hpp
1/*
2 * Open Chinese Convert
3 *
4 * Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#pragma once
20
21#include <atomic>
22#include <mutex>
23
24#include "Common.hpp"
25#include "SerializableDict.hpp"
26
27namespace marisa {
28class Trie;
29}
30
31namespace opencc {
36class OPENCC_EXPORT MarisaDict : public Dict, public SerializableDict {
37public:
38 virtual ~MarisaDict() override;
39
40 virtual size_t KeyMaxLength() const override;
41
42 virtual Optional<const DictEntry*> Match(const char* word,
43 size_t len) const override;
44
45 virtual Optional<const DictEntry*> MatchPrefix(const char* word,
46 size_t len) const override;
47
48 virtual std::vector<const DictEntry*> MatchAllPrefixes(
49 const char* word, size_t len) const override;
50
51 virtual LexiconPtr GetLexicon() const override;
52
53 virtual bool SupportsFastPrefixMatch() const override { return true; }
54
55 virtual PrefixMatchView MatchPrefixValue(const char* word,
56 size_t len) const override;
57
58 virtual void SerializeToFile(FILE* fp) const override;
59
63 static MarisaDictPtr NewFromDict(const Dict& thatDict);
64
65 static MarisaDictPtr NewFromFile(FILE* fp);
66
67 static MarisaDictPtr NewFromBuffer(const char* data, size_t size);
68
69 // Exposed for testing only.
70 bool IsLexiconReconstructed() const {
71 return lexiconReconstructed.load(std::memory_order_acquire);
72 }
73
74private:
75 MarisaDict();
76
77 void LoadFromMappedBuffer();
78 void ReconstructLexicon() const;
79
80 mutable size_t maxLength;
81 mutable LexiconPtr lexicon;
82 mutable std::mutex lexiconMutex;
83 mutable std::atomic<bool> lexiconReconstructed;
84 LexiconPtr valuesLexicon;
85
86 class MarisaInternal;
87 std::unique_ptr<MarisaInternal> internal;
88};
89} // namespace opencc
Abstract class of dictionary.
Definition Dict.hpp:63
virtual Optional< const DictEntry * > MatchPrefix(const char *word, size_t len) const override
Matches the longest matched prefix of a word.
Definition MarisaDict.cpp:70
virtual Optional< const DictEntry * > Match(const char *word, size_t len) const override
Matches a word exactly and returns the DictEntry or Optional::Null().
Definition MarisaDict.cpp:54
virtual LexiconPtr GetLexicon() const override
Returns all entries in the dictionary.
Definition MarisaDict.cpp:101
virtual std::vector< const DictEntry * > MatchAllPrefixes(const char *word, size_t len) const override
Returns all matched prefixes of a word, sorted by the length (desc).
Definition MarisaDict.cpp:87
virtual size_t KeyMaxLength() const override
Returns the length of the longest key in the dictionary.
Definition MarisaDict.cpp:49
virtual bool SupportsFastPrefixMatch() const override
Returns true if this dict can handle prefix queries directly without PrefixMatch building a lookup ta...
Definition MarisaDict.hpp:53
A class that wraps type T into a nullable type.
Definition Optional.hpp:26
Serializable dictionary interface.
Definition SerializableDict.hpp:32
Result of a PrefixMatch fast-path lookup.
Definition Dict.hpp:52