506-avr-libc-optimize_dox.patch

   1 diff -Naurp doc/api/main_page.dox doc/api/main_page.dox
   2 --- doc/api/main_page.dox       2013-03-15 12:07:15.000000000 +0530
   3 +++ doc/api/main_page.dox       2013-03-15 12:27:25.000000000 +0530
   4 @@ -329,13 +329,13 @@ compile-time.
   5
   6  \par Wireless AVR devices:
   7
   8 --atmega64rfr2
   9 --atmega644rfr2
  10 --atmega128rfa1
  11 --atmega128rfr2
  12 --atmega1284rfr2
  13 --atmega256rfr2
  14 --atmega2564rfr2
  15 +- atmega64rfr2
  16 +- atmega644rfr2
  17 +- atmega128rfa1
  18 +- atmega128rfr2
  19 +- atmega1284rfr2
  20 +- atmega256rfr2
  21 +- atmega2564rfr2
  22
  23  \par Miscellaneous Devices:
  24
  25 diff -Naurp doc/api/optimize.dox doc/api/optimize.dox
  26 --- doc/api/optimize.dox        1970-01-01 05:30:00.000000000 +0530
  27 +++ doc/api/optimize.dox        2013-03-15 12:27:25.000000000 +0530
  28 @@ -0,0 +1,137 @@
  29 +/* Copyright (c) 2010 Jan Waclawek
  30 +   Copyright (c) 2010 Joerg Wunsch
  31 +   All rights reserved.
  32 +
  33 +   Redistribution and use in source and binary forms, with or without
  34 +   modification, are permitted provided that the following conditions are met:
  35 +
  36 +   * Redistributions of source code must retain the above copyright
  37 +     notice, this list of conditions and the following disclaimer.
  38 +   * Redistributions in binary form must reproduce the above copyright
  39 +     notice, this list of conditions and the following disclaimer in
  40 +     the documentation and/or other materials provided with the
  41 +     distribution.
  42 +
  43 +  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  44 +  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  45 +  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  46 +  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  47 +  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  48 +  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  49 +  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  50 +  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  51 +  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  52 +  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  53 +  POSSIBILITY OF SUCH DAMAGE. */
  54 +
  55 +/* $Id$ */
  56 +
  57 +/** \page optimization Compiler optimization
  58 +
  59 +\section optim_code_reorder Problems with reordering code
  60 +\author Jan Waclawek
  61 +
  62 +Programs contain sequences of statements, and a naive compiler would
  63 +execute them exactly in the order as they are written. But an
  64 +optimizing compiler is free to \e reorder the statements - or even
  65 +parts of them - if the resulting "net effect" is the same. The
  66 +"measure" of the "net effect" is what the standard calls "side
  67 +effects", and is accomplished exclusively through accesses (reads and
  68 +writes) to variables qualified as \c volatile. So, as long as all
  69 +volatile reads and writes are to the same addresses and in the same
  70 +order (and writes write the same values), the program is correct,
  71 +regardless of other operations in it. (One important point to note
  72 +here is, that time duration between consecutive volatile accesses is
  73 +not considered at all.)
  74 +
  75 +Unfortunately, there are also operations which are not covered by
  76 +volatile accesses. An example of this in avr-gcc/avr-libc are the
  77 +cli() and sei() macros defined in <avr/interrupt.h>, which convert
  78 +directly to the respective assembler mnemonics through the __asm__()
  79 +statement. These don't constitute a variable access at all, not even
  80 +volatile, so the compiler is free to move them around. Although there
  81 +is a "volatile" qualifier which can be attached to the __asm__()
  82 +statement, its effect on (re)ordering is not clear from the
  83 +documentation (and is more likely only to prevent complete removal by
  84 +the optimiser), as it (among other) states:
  85 +
  86 +<em>Note that even a volatile asm instruction can be moved
  87 +relative to other code, including across jump instructions. [...]
  88 +Similarly, you can't expect a sequence of volatile asm instructions to
  89 +remain perfectly consecutive.</em>
  90 +
  91 +\sa http://gcc.gnu.org/onlinedocs/gcc-4.3.4/gcc/Extended-Asm.html
  92 +
  93 +There is another mechanism which can be used to achieve something
  94 +similar: <em>memory barriers</em>. This is accomplished through adding a
  95 +special "memory" clobber to the inline \c asm statement, and ensures that
  96 +all variables are flushed from registers to memory before the
  97 +statement, and then re-read after the statement. The purpose of memory
  98 +barriers is slightly different than to enforce code ordering: it is
  99 +supposed to ensure that there are no variables "cached" in registers,
 100 +so that it is safe to change the content of registers e.g. when
 101 +switching context in a multitasking OS (on "big" processors with
 102 +out-of-order execution they also imply usage of special instructions
 103 +which force the processor into "in-order" state (this is not the case
 104 +of AVRs)).
 105 +
 106 +However, memory barrier works well in ensuring that all volatile
 107 +accesses before and after the barrier occur in the given order with
 108 +respect to the barrier. However, it does not ensure the compiler
 109 +moving non-volatile-related statements across the barrier. Peter
 110 +Dannegger provided a nice example of this effect:
 111 +
 112 +\code
 113 +#define cli() __asm volatile( "cli" ::: "memory" )
 114 +#define sei() __asm volatile( "sei" ::: "memory" )
 115 +
 116 +unsigned int ivar;
 117 +
 118 +void test2( unsigned int val )
 119 +{
 120 +  val = 65535U / val;
 121 +
 122 +  cli();
 123 +
 124 +  ivar = val;
 125 +
 126 +  sei();
 127 +}
 128 +\endcode
 129 +
 130 +compiles with optimisations switched on (-Os) to
 131 +
 132 +\verbatim
 133 +00000112 <test2>:
 134 + 112:  bc 01           movw    r22, r24
 135 + 114:  f8 94           cli
 136 + 116:  8f ef           ldi     r24, 0xFF       ; 255
 137 + 118:  9f ef           ldi     r25, 0xFF       ; 255
 138 + 11a:  0e 94 96 00     call    0x12c   ; 0x12c <__udivmodhi4>
 139 + 11e:  70 93 01 02     sts     0x0201, r23
 140 + 122:  60 93 00 02     sts     0x0200, r22
 141 + 126:  78 94           sei
 142 + 128:  08 95           ret
 143 +\endverbatim
 144 +
 145 +where the potentially slow division is moved across cli(),
 146 +resulting in interrupts to be disabled longer than intended. Note,
 147 +that the volatile access occurs in order with respect to cli() or
 148 +sei(); so the "net effect" required by the standard is achieved as
 149 +intended, it is "only" the timing which is off. However, for most of
 150 +embedded applications, timing is an important, sometimes critical
 151 +factor.
 152 +
 153 +\sa https://www.mikrocontroller.net/topic/65923
 154 +
 155 +Unfortunately, at the moment, in avr-gcc (nor in the C standard),
 156 +there is no mechanism to enforce complete match of written and
 157 +executed code ordering - except maybe of switching the optimization
 158 +completely off (-O0), or writing all the critical code in assembly.
 159 +
 160 +To sum it up:
 161 +
 162 +\li memory barriers ensure proper ordering of volatile accesses
 163 +\li memory barriers don't ensure statements with no volatile accesses to be reordered across the barrier
 164 +
 165 +*/