Branch data Line data Source code
1 : : // Copyright (C) 2010 Internet Systems Consortium, Inc. ("ISC")
2 : : //
3 : : // Permission to use, copy, modify, and/or distribute this software for any
4 : : // purpose with or without fee is hereby granted, provided that the above
5 : : // copyright notice and this permission notice appear in all copies.
6 : : //
7 : : // THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
8 : : // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
9 : : // AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
10 : : // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
11 : : // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
12 : : // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
13 : : // PERFORMANCE OF THIS SOFTWARE.
14 : :
15 : : #include <stdint.h>
16 : : #include <cassert>
17 : : #include <iterator>
18 : : #include <string>
19 : : #include <vector>
20 : :
21 : : #include <boost/archive/iterators/base64_from_binary.hpp>
22 : : #include <boost/archive/iterators/binary_from_base64.hpp>
23 : : #include <boost/archive/iterators/transform_width.hpp>
24 : : #include <boost/math/common_factor.hpp>
25 : :
26 : : #include <util/encode/base32hex_from_binary.h>
27 : : #include <util/encode/binary_from_base32hex.h>
28 : : #include <util/encode/base16_from_binary.h>
29 : : #include <util/encode/binary_from_base16.h>
30 : : #include <util/encode/base32hex.h>
31 : : #include <util/encode/base64.h>
32 : :
33 : : #include <exceptions/exceptions.h>
34 : :
35 : : using namespace std;
36 : : using namespace boost::archive::iterators;
37 : :
38 : : namespace isc {
39 : : namespace util {
40 : : namespace encode {
41 : :
42 : : // In the following anonymous namespace, we provide a generic framework
43 : : // to encode/decode baseN format. We use the following tools:
44 : : // - boost base64_from_binary/binary_from_base64: provide mapping table for
45 : : // base64.
46 : : // These classes take another iterator (Base) as a template argument, and
47 : : // their dereference operator (operator*()) first retrieves an input value
48 : : // from Base via Base::operator* and converts the value using their mapping
49 : : // table. The converted value is returned as their own operator*.
50 : : // - base{32hex,16}_from_binary/binary_from_base{32hex,16}: provide mapping
51 : : // table for base32hex and base16. A straightforward variation of their
52 : : // base64 counterparts.
53 : : // - EncodeNormalizer/DecodeNormalizer: supplemental filter handling baseN
54 : : // padding characters (=)
55 : : // - boost transform_width: an iterator framework for handling data stream
56 : : // per bit-group. It takes another iterator (Base) and output/input bit
57 : : // numbers (BitsOut/BitsIn) template arguments. A transform_width object
58 : : // internally maintains a bit stream, which can be retrieved per BitsOut
59 : : // bits via its dereference operator (operator*()). It builds the stream
60 : : // by internally iterating over the Base object via Base::operator++ and
61 : : // Base::operator*, using the least BitsIn bits of the result of
62 : : // Base::operator*. In our usage BitsIn for encoding and BitsOut for
63 : : // decoding are always 8 (# of bits for one byte).
64 : : //
65 : : // Its dereference operator
66 : : // retrieves BitsIn bits from the result of "*Base" (if necessary it
67 : : // internally calls ++Base)
68 : : //
69 : : // A conceptual description of how the encoding and decoding work is as
70 : : // follows:
71 : : // Encoding:
72 : : // input binary data => Normalizer (append sufficient number of 0 bits)
73 : : // => transform_width (extract bit groups from the original
74 : : // stream)
75 : : // => baseXX_from_binary (convert each bit group to an
76 : : // encoded byte using the mapping)
77 : : // Decoding:
78 : : // input baseXX text => Normalizer (convert '='s to the encoded characters
79 : : // corresponding to 0, e.g. 'A's in base64)
80 : : // => binary_from_baseXX (convert each encoded byte into
81 : : // the original group bit)
82 : : // => transform_width (build original byte stream by
83 : : // concatenating the decoded bit
84 : : // stream)
85 : : //
86 : : // Below, we define a set of templated classes to handle different parameters
87 : : // for different encoding algorithms.
88 : : namespace {
89 : : // Common constants used for all baseN encoding.
90 : : const char BASE_PADDING_CHAR = '=';
91 : : const uint8_t BINARY_ZERO_CODE = 0;
92 : :
93 : : // EncodeNormalizer is an input iterator intended to be used as a filter
94 : : // between the binary stream and baseXX_from_binary translator (via
95 : : // transform_width). An EncodeNormalizer object is configured with two
96 : : // iterators (base and base_end), specifying the head and end of the input
97 : : // stream. It internally iterators over the original stream, and return
98 : : // each byte of the stream intact via its dereference operator until it
99 : : // reaches the end of the stream. After that the EncodeNormalizer object
100 : : // will return 0 no matter how many times it is subsequently incremented.
101 : : // This is necessary because the input binary stream may not contain
102 : : // sufficient bits for a full encoded text while baseXX_from_binary expects
103 : : // a sufficient length of input.
104 : : // Note: this class is intended to be used within this implementation file,
105 : : // and assumes "base < base_end" on construction without validating the
106 : : // arguments. The behavior is undefined if this assumption doesn't hold.
107 : : class EncodeNormalizer : public iterator<input_iterator_tag, uint8_t> {
108 : : public:
109 : : EncodeNormalizer(const vector<uint8_t>::const_iterator& base,
110 : : const vector<uint8_t>::const_iterator& base_end) :
111 : 3564 : base_(base), base_end_(base_end), in_pad_(false)
112 : : {}
113 : : EncodeNormalizer& operator++() {
114 [ + - ][ # # ]: 22107 : if (!in_pad_) {
[ + - ][ # # ]
[ + - ][ + - ]
[ # # ][ + - ]
[ + - ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ]
115 : 22107 : ++base_;
116 : : }
117 [ + + ][ # # ]: 22107 : if (base_ == base_end_) {
[ + + ][ # # ]
[ + + ][ + + ]
[ # # ][ + + ]
[ + + ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ]
118 : 41051 : in_pad_ = true;
119 : : }
120 : : return (*this);
121 : : }
122 : : const uint8_t& operator*() const {
123 [ + - ][ + + ]: 22211 : if (in_pad_) {
[ + + ]
124 : : return (BINARY_ZERO_CODE);
125 : : } else {
126 : 22107 : return (*base_);
127 : : }
128 : : }
129 : : bool operator==(const EncodeNormalizer& other) const {
130 : 38502 : return (base_ == other.base_);
131 : : }
132 : : private:
133 : : vector<uint8_t>::const_iterator base_;
134 : : const vector<uint8_t>::const_iterator base_end_;
135 : : bool in_pad_;
136 : : };
137 : :
138 : : // DecodeNormalizer is an input iterator intended to be used as a filter
139 : : // between the encoded baseX stream and binary_from_baseXX.
140 : : // A DecodeNormalizer object is configured with three string iterators
141 : : // (base, base_beginpad, and base_end), specifying the head of the string,
142 : : // the beginning position of baseX padding (when there's padding), and
143 : : // end of the string, respectively. It internally iterators over the original
144 : : // stream, and return each character of the encoded string via its dereference
145 : : // operator until it reaches base_beginpad. After that the DecodeNormalizer
146 : : // will return the encoding character corresponding to the all-0 value
147 : : // (which is specified on construction via base_zero_code. see also
148 : : // BaseZeroCode below). This translation is necessary because
149 : : // binary_from_baseXX doesn't accept the padding character (i.e. '=').
150 : : // Note: this class is intended to be used within this implementation file,
151 : : // and for simplicity assumes "base < base_beginpad <= base_end" on
152 : : // construction without validating the arguments. The behavior is undefined
153 : : // if this assumption doesn't hold.
154 : : class DecodeNormalizer : public iterator<input_iterator_tag, char> {
155 : : public:
156 : : DecodeNormalizer(const char base_zero_code,
157 : : const string::const_iterator& base,
158 : : const string::const_iterator& base_beginpad,
159 : : const string::const_iterator& base_end) :
160 : : base_zero_code_(base_zero_code),
161 : : base_(base), base_beginpad_(base_beginpad), base_end_(base_end),
162 : 22276 : in_pad_(false)
163 : : {
164 : : // Skip beginning spaces, if any. We need do it here because
165 : : // otherwise the first call to operator*() would be confused.
166 : 22276 : skipSpaces();
167 : : }
168 : : DecodeNormalizer& operator++() {
169 : 211188 : ++base_;
170 : 211188 : skipSpaces();
171 [ + + + + : 211188 : if (base_ == base_beginpad_) {
+ + + + +
+ + + ]
172 : 202546 : in_pad_ = true;
173 : : }
174 : : return (*this);
175 : : }
176 : 233464 : void skipSpaces() {
177 : : // If (char is signed and) *base_ < 0, on Windows platform with Visual
178 : : // Studio compiler it may trigger _ASSERTE((unsigned)(c + 1) <= 256);
179 : : // so make sure that the parameter of isspace() is larger than 0.
180 : : // We don't simply cast it to unsigned char to avoid confusing the
181 : : // isspace() implementation with a possible extension for values
182 : : // larger than 127. Also note the check is not ">= 0"; for systems
183 : : // where char is unsigned that would always be true and would possibly
184 : : // trigger a compiler warning that could stop the build.
185 [ + + ][ + + ]: 234378 : while (base_ != base_end_ && *base_ > 0 && isspace(*base_)) {
[ + + ][ + + ]
186 : 914 : ++base_;
187 : : }
188 : 233464 : }
189 : 212493 : const char& operator*() const {
190 [ + + ]: 212493 : if (base_ == base_end_) {
191 : : // binary_from_baseX calls this operator when it needs more bits
192 : : // even if the internal iterator (base_) has reached its end
193 : : // (if that happens it means the input is an incomplete baseX
194 : : // string and should be rejected). So this is the only point
195 : : // we can catch and reject this type of invalid input.
196 [ + - ][ + - ]: 106 : isc_throw(BadValue, "Unexpected end of input in BASE decoder");
197 : : }
198 [ + + ]: 212440 : if (in_pad_) {
199 : 1609 : return (base_zero_code_);
200 : : } else {
201 : 212440 : return (*base_);
202 : : }
203 : : }
204 : : bool operator==(const DecodeNormalizer& other) const {
205 : 178491 : return (base_ == other.base_);
206 : : }
207 : : private:
208 : : const char base_zero_code_;
209 : : string::const_iterator base_;
210 : : const string::const_iterator base_beginpad_;
211 : : const string::const_iterator base_end_;
212 : : bool in_pad_;
213 : : };
214 : :
215 : : // BitsPerChunk: number of bits to be converted using the baseN mapping table.
216 : : // e.g. 6 for base64.
217 : : // BaseZeroCode: the byte character that represents a value of 0 in
218 : : // the corresponding encoding. e.g. 'A' for base64.
219 : : // Encoder: baseX_from_binary<transform_width<EncodeNormalizer,
220 : : // BitsPerChunk, 8> >
221 : : // Decoder: transform_width<binary_from_baseX<DecodeNormalizer>,
222 : : // 8, BitsPerChunk>
223 : : template <int BitsPerChunk, char BaseZeroCode,
224 : : typename Encoder, typename Decoder>
225 : : struct BaseNTransformer {
226 : : static string encode(const vector<uint8_t>& binary);
227 : : static void decode(const char* algorithm,
228 : : const string& base64, vector<uint8_t>& result);
229 : :
230 : : // BITS_PER_GROUP is the number of bits for the smallest possible (non
231 : : // empty) bit string that can be converted to a valid baseN encoded text
232 : : // without padding. It's the least common multiple of 8 and BitsPerChunk,
233 : : // e.g. 24 for base64.
234 : : static const int BITS_PER_GROUP =
235 : : boost::math::static_lcm<BitsPerChunk, 8>::value;
236 : :
237 : : // MAX_PADDING_CHARS is the maximum number of padding characters
238 : : // that can appear in a valid baseN encoded text.
239 : : // It's group_len - chars_for_byte, where group_len is the number of
240 : : // encoded characters to represent BITS_PER_GROUP bits, and
241 : : // chars_for_byte is the number of encoded character that is needed to
242 : : // represent a single byte, which is ceil(8 / BitsPerChunk).
243 : : // For example, for base64 we need two encoded characters to represent a
244 : : // byte, and each group consists of 4 encoded characters, so
245 : : // MAX_PADDING_CHARS is 4 - 2 = 2.
246 : : static const int MAX_PADDING_CHARS =
247 : : BITS_PER_GROUP / BitsPerChunk -
248 : : (8 / BitsPerChunk + ((8 % BitsPerChunk) == 0 ? 0 : 1));
249 : : };
250 : :
251 : : template <int BitsPerChunk, char BaseZeroCode,
252 : : typename Encoder, typename Decoder>
253 : : string
254 : 1782 : BaseNTransformer<BitsPerChunk, BaseZeroCode, Encoder, Decoder>::encode(
255 : : const vector<uint8_t>& binary)
256 : : {
257 : : // calculate the resulting length.
258 : 1782 : size_t bits = binary.size() * 8;
259 [ + + ][ + + ]: 1386 : if (bits % BITS_PER_GROUP > 0) {
260 : 104 : bits += (BITS_PER_GROUP - (bits % BITS_PER_GROUP));
261 : : }
262 : 1782 : const size_t len = bits / BitsPerChunk;
263 : :
264 : 0 : string result;
265 [ + - ][ + - ]: 1782 : result.reserve(len);
[ + - ]
266 : 1782 : result.assign(Encoder(EncodeNormalizer(binary.begin(), binary.end())),
267 : : Encoder(EncodeNormalizer(binary.end(), binary.end())));
268 [ - + ][ - + ]: 1782 : assert(len >= result.length());
[ - + ]
269 [ + - ][ + - ]: 1782 : result.append(len - result.length(), BASE_PADDING_CHAR);
[ + - ]
270 : 1782 : return (result);
271 : : }
272 : :
273 : : template <int BitsPerChunk, char BaseZeroCode,
274 : : typename Encoder, typename Decoder>
275 : : void
276 : 11164 : BaseNTransformer<BitsPerChunk, BaseZeroCode, Encoder, Decoder>::decode(
277 : : const char* const algorithm,
278 : : const string& input,
279 : : vector<uint8_t>& result)
280 : : {
281 : : // enumerate the number of trailing padding characters (=), ignoring
282 : : // white spaces. since baseN_from_binary doesn't accept padding,
283 : : // we handle it explicitly.
284 : 11164 : size_t padchars = 0;
285 : : string::const_reverse_iterator srit = input.rbegin();
286 : : string::const_reverse_iterator srit_end = input.rend();
287 [ + - ][ + + ]: 12949 : while (srit != srit_end) {
[ + + ]
288 : 12827 : char ch = *srit;
289 [ + + ][ + + ]: 12827 : if (ch == BASE_PADDING_CHAR) {
[ + + ]
290 [ + + ][ + + ]: 1720 : if (++padchars > MAX_PADDING_CHARS) {
291 [ + - ][ + - ]: 32 : isc_throw(BadValue, "Too many " << algorithm
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
292 : : << " padding characters: " << input);
293 : : }
294 [ + + ][ + + ]: 11106 : } else if (ch < 0 || !isspace(ch)) {
[ + + ][ + + ]
[ + - ][ + + ]
295 : : break;
296 : : }
297 : : ++srit;
298 : : }
299 : : // then calculate the number of padding bits corresponding to the padding
300 : : // characters. In general, the padding bits consist of all-zero
301 : : // trailing bits of the last encoded character followed by zero bits
302 : : // represented by the padding characters:
303 : : // 1st pad 2nd pad 3rd pad...
304 : : // +++===== ======= ===... (+: from encoded chars, =: from pad chars)
305 : : // 0000...0 0......0 000...
306 : : // 0 7 8 15 16.... (bits)
307 : : // The number of bits for the '==...' part is padchars * BitsPerChunk.
308 : : // So the total number of padding bits is the smallest multiple of 8
309 : : // that is >= padchars * BitsPerChunk.
310 : : // (Below, note the common idiom of the bitwise AND with ~7. It clears the
311 : : // lowest three bits, so has the effect of rounding the result down to the
312 : : // nearest multiple of 8)
313 : 11148 : const size_t padbits = (padchars * BitsPerChunk + 7) & ~7;
314 : :
315 : : // In some encoding algorithm, it could happen that a padding byte would
316 : : // contain a full set of encoded bits, which is not allowed by definition
317 : : // of padding. For example, if BitsPerChunk is 5, the following
318 : : // representation could happen:
319 : : // ++00000= (+: from encoded chars, 0: encoded char for '0', =: pad chars)
320 : : // 0 7 (bits)
321 : : // This must actually be encoded as follows:
322 : : // ++======
323 : : // 0 7 (bits)
324 : : // The following check rejects this type of invalid encoding.
325 [ + + ][ - + ]: 9695 : if (padbits > BitsPerChunk * (padchars + 1)) {
326 [ + - ][ + - ]: 20 : isc_throw(BadValue, "Invalid " << algorithm << "padding: " << input);
[ + - ][ + - ]
[ + - ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
327 : : }
328 : :
329 : : // convert the number of bits in bytes for convenience.
330 : 11138 : const size_t padbytes = padbits / 8;
331 : :
332 : : try {
333 : 22276 : result.assign(Decoder(DecodeNormalizer(BaseZeroCode, input.begin(),
334 : : srit.base(), input.end())),
335 : : Decoder(DecodeNormalizer(BaseZeroCode, input.end(),
336 : : input.end(), input.end())));
337 [ # # ][ # # ]: 78 : } catch (const dataflow_exception& ex) {
[ - + ]
338 : : // convert any boost exceptions into our local one.
339 [ # # ][ # # ]: 78 : isc_throw(BadValue, ex.what());
[ # # ][ # # ]
[ # # ][ # # ]
[ - + ][ - + ]
[ - + ]
340 : : }
341 : :
342 : : // Confirm the original BaseX text is the canonical encoding of the
343 : : // data, that is, that the first byte of padding is indeed 0.
344 : : // (DecodeNormalizer and binary_from_baseXX ensure that the rest of the
345 : : // padding is all zero).
346 [ - + ][ - + ]: 8621 : assert(result.size() >= padbytes);
347 [ + + ][ + + ]: 8621 : if (padbytes > 0 && *(result.end() - padbytes) != 0) {
[ + + ][ + + ]
[ + + ][ + + ]
348 [ + - ][ + - ]: 32 : isc_throw(BadValue, "Non 0 bits included in " << algorithm
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
349 : : << " padding: " << input);
350 : : }
351 : :
352 : : // strip the padded zero-bit fields
353 : 9817 : result.resize(result.size() - padbytes);
354 : 9817 : }
355 : :
356 : : //
357 : : // Instantiation for BASE-64
358 : : //
359 : : typedef
360 : : base64_from_binary<transform_width<EncodeNormalizer, 6, 8> > base64_encoder;
361 : : typedef
362 : : transform_width<binary_from_base64<DecodeNormalizer>, 8, 6> base64_decoder;
363 : : typedef BaseNTransformer<6, 'A', base64_encoder, base64_decoder>
364 : : Base64Transformer;
365 : :
366 : : //
367 : : // Instantiation for BASE-32HEX
368 : : //
369 : : typedef
370 : : base32hex_from_binary<transform_width<EncodeNormalizer, 5, 8> >
371 : : base32hex_encoder;
372 : : typedef
373 : : transform_width<binary_from_base32hex<DecodeNormalizer>, 8, 5>
374 : : base32hex_decoder;
375 : : typedef BaseNTransformer<5, '0', base32hex_encoder, base32hex_decoder>
376 : : Base32HexTransformer;
377 : :
378 : : //
379 : : // Instantiation for BASE-16 (HEX)
380 : : //
381 : : typedef
382 : : base16_from_binary<transform_width<EncodeNormalizer, 4, 8> > base16_encoder;
383 : : typedef
384 : : transform_width<binary_from_base16<DecodeNormalizer>, 8, 4> base16_decoder;
385 : : typedef BaseNTransformer<4, '0', base16_encoder, base16_decoder>
386 : : Base16Transformer;
387 : : }
388 : :
389 : : string
390 : 834 : encodeBase64(const vector<uint8_t>& binary) {
391 : 834 : return (Base64Transformer::encode(binary));
392 : : }
393 : :
394 : : void
395 : 7655 : decodeBase64(const string& input, vector<uint8_t>& result) {
396 : 7655 : Base64Transformer::decode("base64", input, result);
397 : 7586 : }
398 : :
399 : : string
400 : 552 : encodeBase32Hex(const vector<uint8_t>& binary) {
401 : 552 : return (Base32HexTransformer::encode(binary));
402 : : }
403 : :
404 : : void
405 : 2055 : decodeBase32Hex(const string& input, vector<uint8_t>& result) {
406 : 2055 : Base32HexTransformer::decode("base32hex", input, result);
407 : 1019 : }
408 : :
409 : : string
410 : 396 : encodeHex(const vector<uint8_t>& binary) {
411 : 396 : return (Base16Transformer::encode(binary));
412 : : }
413 : :
414 : : void
415 : 1454 : decodeHex(const string& input, vector<uint8_t>& result) {
416 : 1454 : Base16Transformer::decode("base16", input, result);
417 : 1212 : }
418 : :
419 : : } // namespace encode
420 : : } // namespace util
421 : 528102 : } // namespace isc
|