1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
|
commit 1cd2509ed74ae47965006d16de3c09db029b4efe
Author: Gianfranco Costamagna <costamagnagianfranco@yahoo.it>
Date: Mon Jan 4 23:45:13 2021 +0100
Fix various cmake issues:
"CMAKE_INSTALL_FULL_LIBDIR" not being correctly evaluated and used
pkgconfig directory wrongly set to include instead of lib
cmake directory wrongly set to include instead of lib
core_libname contrib_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR variables not being substituted to cmake.in files
cmake helpers not being correctly set
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 41de688..71dbbd5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,7 +21,7 @@ if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
endif()
set(LIB_DESTINATION
- "${CMAKE_INSTALL_FULL_LIBDIR}" CACHE STRING "Define lib output directory name")
+ "${CMAKE_INSTALL_LIBDIR}" CACHE STRING "Define lib output directory name")
####################################
diff --git a/src/config/CMakeLists.txt b/src/config/CMakeLists.txt
index e5e6624..fe8e8b8 100644
--- a/src/config/CMakeLists.txt
+++ b/src/config/CMakeLists.txt
@@ -1,16 +1,16 @@
####################################
# Set config vars
####################################
-set(core_libname, "lucene++")
-set(contrib_libname, "lucene++-contrib")
+set(core_libname "lucene++")
+set(contrib_libname "lucene++-contrib")
set(
- PACKAGE_CMAKE_INSTALL_INCLUDEDIR,
- "${lucene++_INCLUDE_DIR}/lucene++/")
+ PACKAGE_CMAKE_INSTALL_INCLUDEDIR
+ "${CMAKE_INSTALL_INCLUDEDIR}/lucene++/")
set(
- PACKAGE_CMAKE_INSTALL_LIBDIR,
- "${LIB_INSTALL_DIR}/cmake")
+ PACKAGE_CMAKE_INSTALL_LIBDIR
+ "${LIB_DESTINATION}")
####################################
diff --git a/src/config/contrib/CMakeLists.txt b/src/config/contrib/CMakeLists.txt
index c0dd86f..b4a4391 100644
--- a/src/config/contrib/CMakeLists.txt
+++ b/src/config/contrib/CMakeLists.txt
@@ -9,7 +9,7 @@ if(NOT WIN32)
install(
FILES
"${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contrib.pc"
- DESTINATION "include/pkgconfig")
+ DESTINATION "${LIB_DESTINATION}/pkgconfig")
endif()
@@ -19,7 +19,8 @@ endif()
configure_package_config_file(
"${CMAKE_CURRENT_SOURCE_DIR}/liblucene++-contribConfig.cmake.in"
"${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfig.cmake"
- INSTALL_DESTINATION "${LIB_DESTINATION}/cmake")
+ INSTALL_DESTINATION "${LIB_DESTINATION}/cmake/liblucene++-contrib"
+ PATH_VARS contrib_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfigVersion.cmake"
@@ -30,4 +31,4 @@ install(
FILES
"${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfigVersion.cmake"
- DESTINATION "include/cmake")
+ DESTINATION "${LIB_DESTINATION}/cmake/liblucene++-contrib")
diff --git a/src/config/contrib/liblucene++-contribConfig.cmake.in b/src/config/contrib/liblucene++-contribConfig.cmake.in
index f92f683..85fdfd2 100644
--- a/src/config/contrib/liblucene++-contribConfig.cmake.in
+++ b/src/config/contrib/liblucene++-contribConfig.cmake.in
@@ -20,6 +20,6 @@ if (NOT DEFINED set_and_check)
endif()
-set_and_check(liblucene++-contrib_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/@contrib_libname@")
-set_and_check(liblucene++-contrib_LIBRARY_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@")
-set(liblucene++-contrib_LIBRARIES "@PACKAGE_CMAKE_INSTALL_LIBDIR@/@contrib_libname@")
+set_and_check(liblucene++-contrib_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
+set_and_check(liblucene++-contrib_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_LIBDIR@")
+set(liblucene++-contrib_LIBRARIES "@contrib_libname@")
diff --git a/src/config/core/CMakeLists.txt b/src/config/core/CMakeLists.txt
index a3eb17a..65376f5 100644
--- a/src/config/core/CMakeLists.txt
+++ b/src/config/core/CMakeLists.txt
@@ -9,7 +9,7 @@ if(NOT WIN32)
install(
FILES
"${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc"
- DESTINATION "include/pkgconfig")
+ DESTINATION "${LIB_DESTINATION}/pkgconfig")
endif()
@@ -19,7 +19,8 @@ endif()
configure_package_config_file(
"${CMAKE_CURRENT_SOURCE_DIR}/liblucene++Config.cmake.in"
"${CMAKE_CURRENT_BINARY_DIR}/liblucene++Config.cmake"
- INSTALL_DESTINATION "${LIB_DESTINATION}/cmake")
+ INSTALL_DESTINATION "${LIB_DESTINATION}/cmake/liblucene++"
+ PATH_VARS core_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR)
write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/liblucene++ConfigVersion.cmake
@@ -30,4 +31,4 @@ install(
FILES
"${CMAKE_CURRENT_BINARY_DIR}/liblucene++Config.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/liblucene++ConfigVersion.cmake"
- DESTINATION "include/cmake")
+ DESTINATION "${LIB_DESTINATION}/cmake/liblucene++")
diff --git a/src/config/core/liblucene++Config.cmake.in b/src/config/core/liblucene++Config.cmake.in
index 89b48a3..574f812 100644
--- a/src/config/core/liblucene++Config.cmake.in
+++ b/src/config/core/liblucene++Config.cmake.in
@@ -20,8 +20,8 @@ if (NOT DEFINED set_and_check)
endif()
-set_and_check(liblucene++_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/@core_libname@")
-set_and_check(liblucene++_LIBRARY_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@")
-set(liblucene++_LIBRARIES "@PACKAGE_CMAKE_INSTALL_LIBDIR@/@core_libname@")
+set_and_check(liblucene++_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
+set_and_check(liblucene++_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_LIBDIR@")
+set(liblucene++_LIBRARIES "@core_libname@")
commit 127492f1ab85fd74ab9f9dff0be5b0a63bf7c271
Merge: df65bf5 fd9eaf1
Author: Alan Wright <85800+alanw@users.noreply.github.com>
Date: Tue Dec 29 17:46:39 2020 +0000
Merge pull request #160 from Kakueeen/master
fix a bug of ChineseTokenizer
commit fd9eaf10c49239d700af848062acc1d5efd54aa8
Author: liuzhangjian <liuzhangjian@uniontech.com>
Date: Fri Dec 4 15:41:31 2020 +0800
Title:fix a bug of ChineseTokenizer
Description:When I use ChineseAnalyzer for Chinese word segmentation, I find that English and numbers are treated as one word and I think they should be separated.
RootCause:Null
Solution:
diff --git a/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp b/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp
index d2a19f3..8313445 100644
--- a/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp
+++ b/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp
@@ -38,7 +38,7 @@ bool ChineseFilter::incrementToken() {
if (text.length() > 1) {
return true;
}
- } else if (UnicodeUtil::isOther(text[0])) {
+ } else if (UnicodeUtil::isOther(text[0]) || UnicodeUtil::isDigit(text[0])) {
// One Chinese character as one Chinese word.
// Chinese word extraction to be added later here.
return true;
diff --git a/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp b/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp
index 38bf987..3b4de74 100644
--- a/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp
+++ b/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp
@@ -65,6 +65,7 @@ bool ChineseTokenizer::incrementToken() {
length = 0;
start = offset;
+ bool last_is_en = false, last_is_num = false;
while (true) {
wchar_t c;
@@ -82,11 +83,30 @@ bool ChineseTokenizer::incrementToken() {
c = ioBuffer[bufferIndex++];
}
- if (UnicodeUtil::isDigit(c) || UnicodeUtil::isLower(c) || UnicodeUtil::isUpper(c)) {
+ if (UnicodeUtil::isLower(c) || UnicodeUtil::isUpper(c)) {
+ if (last_is_num) {
+ --bufferIndex;
+ --offset;
+ return flush();
+ }
+
+ push(c);
+ if (length == MAX_WORD_LEN) {
+ return flush();
+ }
+ last_is_en = true;
+ } else if (UnicodeUtil::isDigit(c)) {
+ if (last_is_en) {
+ --bufferIndex;
+ --offset;
+ return flush();
+ }
+
push(c);
if (length == MAX_WORD_LEN) {
return flush();
}
+ last_is_num = true;
} else if (UnicodeUtil::isOther(c)) {
if (length > 0) {
--bufferIndex;
|