libstdc++
codecvt_specializations.h
Go to the documentation of this file.
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 //
26 // ISO C++ 14882: 22.2.1.5 Template class codecvt
27 //
28 
29 // Written by Benjamin Kosnik <bkoz@redhat.com>
30 
31 /** @file ext/codecvt_specializations.h
32  * This file is a GNU extension to the Standard C++ Library.
33  */
34 
35 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
36 #define _EXT_CODECVT_SPECIALIZATIONS_H 1
37 
38 #include <bits/c++config.h>
39 #include <locale>
40 #include <iconv.h>
41 
42 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
43 {
44 _GLIBCXX_BEGIN_NAMESPACE_VERSION
45 _GLIBCXX_BEGIN_NAMESPACE_CXX11
46 
47  /// Extension to use iconv for dealing with character encodings.
48  // This includes conversions and comparisons between various character
49  // sets. This object encapsulates data that may need to be shared between
50  // char_traits, codecvt and ctype.
52  {
53  public:
54  // Types:
55  // NB: A conversion descriptor subsumes and enhances the
56  // functionality of a simple state type such as mbstate_t.
57  typedef iconv_t descriptor_type;
58 
59  protected:
60  // Name of internal character set encoding.
61  std::string _M_int_enc;
62 
63  // Name of external character set encoding.
64  std::string _M_ext_enc;
65 
66  // Conversion descriptor between external encoding to internal encoding.
67  descriptor_type _M_in_desc;
68 
69  // Conversion descriptor between internal encoding to external encoding.
70  descriptor_type _M_out_desc;
71 
72  // The byte-order marker for the external encoding, if necessary.
73  int _M_ext_bom;
74 
75  // The byte-order marker for the internal encoding, if necessary.
76  int _M_int_bom;
77 
78  // Number of external bytes needed to construct one complete
79  // character in the internal encoding.
80  // NB: -1 indicates variable, or stateful, encodings.
81  int _M_bytes;
82 
83  public:
84  explicit
86  : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
87  { }
88 
89  explicit
90  encoding_state(const char* __int, const char* __ext,
91  int __ibom = 0, int __ebom = 0, int __bytes = 1)
92  : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
93  _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
94  { init(); }
95 
96  // 21.1.2 traits typedefs
97  // p4
98  // typedef STATE_T state_type
99  // requires: state_type shall meet the requirements of
100  // CopyConstructible types (20.1.3)
101  // NB: This does not preserve the actual state of the conversion
102  // descriptor member, but it does duplicate the encoding
103  // information.
104  encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
105  { construct(__obj); }
106 
107  // Need assignment operator as well.
109  operator=(const encoding_state& __obj)
110  {
111  construct(__obj);
112  return *this;
113  }
114 
115  ~encoding_state()
116  { destroy(); }
117 
118  bool
119  good() const throw()
120  {
121  const descriptor_type __err = (iconv_t)(-1);
122  bool __test = _M_in_desc && _M_in_desc != __err;
123  __test &= _M_out_desc && _M_out_desc != __err;
124  return __test;
125  }
126 
127  int
128  character_ratio() const
129  { return _M_bytes; }
130 
131  const std::string
132  internal_encoding() const
133  { return _M_int_enc; }
134 
135  int
136  internal_bom() const
137  { return _M_int_bom; }
138 
139  const std::string
140  external_encoding() const
141  { return _M_ext_enc; }
142 
143  int
144  external_bom() const
145  { return _M_ext_bom; }
146 
147  const descriptor_type&
148  in_descriptor() const
149  { return _M_in_desc; }
150 
151  const descriptor_type&
152  out_descriptor() const
153  { return _M_out_desc; }
154 
155  protected:
156  void
157  init()
158  {
159  const descriptor_type __err = (iconv_t)(-1);
160  const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
161  if (!_M_in_desc && __have_encodings)
162  {
163  _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
164  if (_M_in_desc == __err)
165  std::__throw_runtime_error(__N("encoding_state::_M_init "
166  "creating iconv input descriptor failed"));
167  }
168  if (!_M_out_desc && __have_encodings)
169  {
170  _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
171  if (_M_out_desc == __err)
172  std::__throw_runtime_error(__N("encoding_state::_M_init "
173  "creating iconv output descriptor failed"));
174  }
175  }
176 
177  void
178  construct(const encoding_state& __obj)
179  {
180  destroy();
181  _M_int_enc = __obj._M_int_enc;
182  _M_ext_enc = __obj._M_ext_enc;
183  _M_ext_bom = __obj._M_ext_bom;
184  _M_int_bom = __obj._M_int_bom;
185  _M_bytes = __obj._M_bytes;
186  init();
187  }
188 
189  void
190  destroy() throw()
191  {
192  const descriptor_type __err = (iconv_t)(-1);
193  if (_M_in_desc && _M_in_desc != __err)
194  {
195  iconv_close(_M_in_desc);
196  _M_in_desc = 0;
197  }
198  if (_M_out_desc && _M_out_desc != __err)
199  {
200  iconv_close(_M_out_desc);
201  _M_out_desc = 0;
202  }
203  }
204  };
205 
206  /// encoding_char_traits
207  // Custom traits type with encoding_state for the state type, and the
208  // associated fpos<encoding_state> for the position type, all other
209  // bits equivalent to the required char_traits instantiations.
210  template<typename _CharT>
212  : public std::char_traits<_CharT>
213  {
214  typedef encoding_state state_type;
215  typedef typename std::fpos<state_type> pos_type;
216  };
217 
218 _GLIBCXX_END_NAMESPACE_CXX11
219 _GLIBCXX_END_NAMESPACE_VERSION
220 } // namespace
221 
222 
223 namespace std _GLIBCXX_VISIBILITY(default)
224 {
225 _GLIBCXX_BEGIN_NAMESPACE_VERSION
226 
228 
229  /// codecvt<InternT, _ExternT, encoding_state> specialization.
230  // This partial specialization takes advantage of iconv to provide
231  // code conversions between a large number of character encodings.
232  template<typename _InternT, typename _ExternT>
233  class codecvt<_InternT, _ExternT, encoding_state>
234  : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
235  {
236  public:
237  // Types:
238  typedef codecvt_base::result result;
239  typedef _InternT intern_type;
240  typedef _ExternT extern_type;
242  typedef state_type::descriptor_type descriptor_type;
243 
244  // Data Members:
245  static locale::id id;
246 
247  explicit
248  codecvt(size_t __refs = 0)
250  { }
251 
252  explicit
253  codecvt(state_type& __enc, size_t __refs = 0)
255  { }
256 
257  protected:
258  virtual
259  ~codecvt() { }
260 
261  virtual result
262  do_out(state_type& __state, const intern_type* __from,
263  const intern_type* __from_end, const intern_type*& __from_next,
264  extern_type* __to, extern_type* __to_end,
265  extern_type*& __to_next) const;
266 
267  virtual result
268  do_unshift(state_type& __state, extern_type* __to,
269  extern_type* __to_end, extern_type*& __to_next) const;
270 
271  virtual result
272  do_in(state_type& __state, const extern_type* __from,
273  const extern_type* __from_end, const extern_type*& __from_next,
274  intern_type* __to, intern_type* __to_end,
275  intern_type*& __to_next) const;
276 
277  virtual int
278  do_encoding() const throw();
279 
280  virtual bool
281  do_always_noconv() const throw();
282 
283  virtual int
284  do_length(state_type&, const extern_type* __from,
285  const extern_type* __end, size_t __max) const;
286 
287  virtual int
288  do_max_length() const throw();
289  };
290 
291  template<typename _InternT, typename _ExternT>
292  locale::id
294 
295  // This adaptor works around the signature problems of the second
296  // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2
297  // uses 'char**', which matches the POSIX 1003.1-2001 standard.
298  // Using this adaptor, g++ will do the work for us.
299  template<typename _Tp>
300  inline size_t
301  __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
302  iconv_t __cd, char** __inbuf, size_t* __inbytes,
303  char** __outbuf, size_t* __outbytes)
304  { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
305 
306  template<typename _InternT, typename _ExternT>
307  codecvt_base::result
309  do_out(state_type& __state, const intern_type* __from,
310  const intern_type* __from_end, const intern_type*& __from_next,
311  extern_type* __to, extern_type* __to_end,
312  extern_type*& __to_next) const
313  {
314  result __ret = codecvt_base::error;
315  if (__state.good())
316  {
317  const descriptor_type& __desc = __state.out_descriptor();
318  const size_t __fmultiple = sizeof(intern_type);
319  size_t __fbytes = __fmultiple * (__from_end - __from);
320  const size_t __tmultiple = sizeof(extern_type);
321  size_t __tbytes = __tmultiple * (__to_end - __to);
322 
323  // Argument list for iconv specifies a byte sequence. Thus,
324  // all to/from arrays must be brutally casted to char*.
325  char* __cto = reinterpret_cast<char*>(__to);
326  char* __cfrom;
327  size_t __conv;
328 
329  // Some encodings need a byte order marker as the first item
330  // in the byte stream, to designate endian-ness. The default
331  // value for the byte order marker is NULL, so if this is
332  // the case, it's not necessary and we can just go on our
333  // merry way.
334  int __int_bom = __state.internal_bom();
335  if (__int_bom)
336  {
337  size_t __size = __from_end - __from;
338  intern_type* __cfixed = static_cast<intern_type*>
339  (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
340  __cfixed[0] = static_cast<intern_type>(__int_bom);
341  char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
342  __cfrom = reinterpret_cast<char*>(__cfixed);
343  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
344  &__fbytes, &__cto, &__tbytes);
345  }
346  else
347  {
348  intern_type* __cfixed = const_cast<intern_type*>(__from);
349  __cfrom = reinterpret_cast<char*>(__cfixed);
350  __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
351  &__cto, &__tbytes);
352  }
353 
354  if (__conv != size_t(-1))
355  {
356  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
357  __to_next = reinterpret_cast<extern_type*>(__cto);
358  __ret = codecvt_base::ok;
359  }
360  else
361  {
362  if (__fbytes < __fmultiple * (__from_end - __from))
363  {
364  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
365  __to_next = reinterpret_cast<extern_type*>(__cto);
366  __ret = codecvt_base::partial;
367  }
368  else
369  __ret = codecvt_base::error;
370  }
371  }
372  return __ret;
373  }
374 
375  template<typename _InternT, typename _ExternT>
376  codecvt_base::result
378  do_unshift(state_type& __state, extern_type* __to,
379  extern_type* __to_end, extern_type*& __to_next) const
380  {
381  result __ret = codecvt_base::error;
382  if (__state.good())
383  {
384  const descriptor_type& __desc = __state.in_descriptor();
385  const size_t __tmultiple = sizeof(intern_type);
386  size_t __tlen = __tmultiple * (__to_end - __to);
387 
388  // Argument list for iconv specifies a byte sequence. Thus,
389  // all to/from arrays must be brutally casted to char*.
390  char* __cto = reinterpret_cast<char*>(__to);
391  size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
392  &__cto, &__tlen);
393 
394  if (__conv != size_t(-1))
395  {
396  __to_next = reinterpret_cast<extern_type*>(__cto);
397  if (__tlen == __tmultiple * (__to_end - __to))
398  __ret = codecvt_base::noconv;
399  else if (__tlen == 0)
400  __ret = codecvt_base::ok;
401  else
402  __ret = codecvt_base::partial;
403  }
404  else
405  __ret = codecvt_base::error;
406  }
407  return __ret;
408  }
409 
410  template<typename _InternT, typename _ExternT>
411  codecvt_base::result
412  codecvt<_InternT, _ExternT, encoding_state>::
413  do_in(state_type& __state, const extern_type* __from,
414  const extern_type* __from_end, const extern_type*& __from_next,
415  intern_type* __to, intern_type* __to_end,
416  intern_type*& __to_next) const
417  {
418  result __ret = codecvt_base::error;
419  if (__state.good())
420  {
421  const descriptor_type& __desc = __state.in_descriptor();
422  const size_t __fmultiple = sizeof(extern_type);
423  size_t __flen = __fmultiple * (__from_end - __from);
424  const size_t __tmultiple = sizeof(intern_type);
425  size_t __tlen = __tmultiple * (__to_end - __to);
426 
427  // Argument list for iconv specifies a byte sequence. Thus,
428  // all to/from arrays must be brutally casted to char*.
429  char* __cto = reinterpret_cast<char*>(__to);
430  char* __cfrom;
431  size_t __conv;
432 
433  // Some encodings need a byte order marker as the first item
434  // in the byte stream, to designate endian-ness. The default
435  // value for the byte order marker is NULL, so if this is
436  // the case, it's not necessary and we can just go on our
437  // merry way.
438  int __ext_bom = __state.external_bom();
439  if (__ext_bom)
440  {
441  size_t __size = __from_end - __from;
442  extern_type* __cfixed = static_cast<extern_type*>
443  (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
444  __cfixed[0] = static_cast<extern_type>(__ext_bom);
445  char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
446  __cfrom = reinterpret_cast<char*>(__cfixed);
447  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
448  &__flen, &__cto, &__tlen);
449  }
450  else
451  {
452  extern_type* __cfixed = const_cast<extern_type*>(__from);
453  __cfrom = reinterpret_cast<char*>(__cfixed);
454  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
455  &__flen, &__cto, &__tlen);
456  }
457 
458 
459  if (__conv != size_t(-1))
460  {
461  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
462  __to_next = reinterpret_cast<intern_type*>(__cto);
463  __ret = codecvt_base::ok;
464  }
465  else
466  {
467  if (__flen < static_cast<size_t>(__from_end - __from))
468  {
469  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
470  __to_next = reinterpret_cast<intern_type*>(__cto);
471  __ret = codecvt_base::partial;
472  }
473  else
474  __ret = codecvt_base::error;
475  }
476  }
477  return __ret;
478  }
479 
480  template<typename _InternT, typename _ExternT>
481  int
482  codecvt<_InternT, _ExternT, encoding_state>::
483  do_encoding() const throw()
484  {
485  int __ret = 0;
486  if (sizeof(_ExternT) <= sizeof(_InternT))
487  __ret = sizeof(_InternT) / sizeof(_ExternT);
488  return __ret;
489  }
490 
491  template<typename _InternT, typename _ExternT>
492  bool
493  codecvt<_InternT, _ExternT, encoding_state>::
494  do_always_noconv() const throw()
495  { return false; }
496 
497  template<typename _InternT, typename _ExternT>
498  int
499  codecvt<_InternT, _ExternT, encoding_state>::
500  do_length(state_type&, const extern_type* __from,
501  const extern_type* __end, size_t __max) const
502  { return std::min(__max, static_cast<size_t>(__end - __from)); }
503 
504  // _GLIBCXX_RESOLVE_LIB_DEFECTS
505  // 74. Garbled text for codecvt::do_max_length
506  template<typename _InternT, typename _ExternT>
507  int
508  codecvt<_InternT, _ExternT, encoding_state>::
509  do_max_length() const throw()
510  { return 1; }
511 
512 _GLIBCXX_END_NAMESPACE_VERSION
513 } // namespace
514 
515 #endif
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:230
ISO C++ entities toplevel namespace is std.
GNU extensions for public use.
Basis for explicit traits specializations.
Definition: char_traits.h:330
Common base for codecvt functions.
Definition: codecvt.h:73
Primary class template codecvt.
Definition: codecvt.h:279
virtual result do_out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
const _CharT * c_str() const noexcept
Return const pointer to null-terminated contents.
Definition: cow_string.h:2206
size_type size() const noexcept
Returns the number of characters in the string, not including any null-termination.
Definition: cow_string.h:913
Facet ID class.
Class representing stream positions.
Definition: postypes.h:83
Extension to use iconv for dealing with character encodings.