[FFmpeg-devel] [PATCH] Make HAVE_FAST_UNALIGNED allow unaligned memory accesses
Måns Rullgård
mans
Fri Jul 18 10:00:01 CEST 2008
Michael Niedermayer <michaelni at gmx.at> writes:
> On Fri, Jul 18, 2008 at 02:23:29AM +0100, Mans Rullgard wrote:
>> If HAVE_FAST_UNALIGNED is defined, potentially unaligned data is
>> accessed through normal pointers. Otherwise, compiler-specific
>> code is used to perform unaligned accesses, falling back to
>> byte-wise access if no compiler support is available.
>> ---
>> libavutil/intreadwrite.h | 34 +++++++++++++++++++++++++++++-----
>> 1 files changed, 29 insertions(+), 5 deletions(-)
>>
>> diff --git a/libavutil/intreadwrite.h b/libavutil/intreadwrite.h
>> index 72ad5b3..12dec54 100644
>> --- a/libavutil/intreadwrite.h
>> +++ b/libavutil/intreadwrite.h
>> @@ -23,6 +23,8 @@
>> #include "config.h"
>> #include "bswap.h"
>>
>> +#if !defined(HAVE_FAST_UNALIGNED)
>> +
>> #ifdef __GNUC__
>>
>> struct unaligned_64 { uint64_t l; } __attribute__((packed));
>> @@ -47,7 +49,9 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
>> #define AV_WN32(a, b) *((__unaligned uint32_t*)(a)) = (b)
>> #define AV_WN64(a, b) *((__unaligned uint64_t*)(a)) = (b)
>>
>> -#else
>> +#endif
>> +
>> +#else /* !HAVE_FAST_UNALIGNED */
>>
>> #define AV_RN16(a) (*((const uint16_t*)(a)))
>> #define AV_RN32(a) (*((const uint32_t*)(a)))
>> @@ -57,7 +61,7 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
>> #define AV_WN32(a, b) *((uint32_t*)(a)) = (b)
>> #define AV_WN64(a, b) *((uint64_t*)(a)) = (b)
>>
>> -#endif /* !__GNUC__ */
>> +#endif /* !HAVE_FAST_UNALIGNED */
>>
>> /* endian macros */
>> #define AV_RB8(x) (((const uint8_t*)(x))[0])
>
> ok
>
>> @@ -66,7 +70,8 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
>> #define AV_RL8(x) AV_RB8(x)
>> #define AV_WL8(p, d) AV_WB8(p, d)
>>
>> -#ifdef HAVE_FAST_UNALIGNED
>> +#ifdef AV_RN16
>> +
>> # ifdef WORDS_BIGENDIAN
>> # define AV_RB16(x) AV_RN16(x)
>> # define AV_WB16(p, d) AV_WN16(p, d)
>
> Platforms where HAVE_FAST_UNALIGNED is not set would generally have AV_RN16
> set thus use things like:
> # define AV_RL16(x) bswap_16(AV_RN16(x))
> # define AV_WL16(p, d) AV_WN16(p, bswap_16(d))
> # define AV_RL32(x) bswap_32(AV_RN32(x))
> # define AV_WL32(p, d) AV_WN32(p, bswap_32(d))
>
> These may be slower than the naive bytewise reading macros on such
> platforms.
For the reverse endian version, this may be the case. It depends on
the CPU and compiler. Some CPUs (e.g. Alpha) have special
instructions for unaligned load/store that are faster than byte-wise
access (especially on the early Alphas without byte-access), but
slower than aligned operations. I'll have to do some tests with the
cross-compilers I have available and see what happens.
For native endian, the compiler (including gcc) will either use
CPU-specific instructions or generate the equivalent of our naive
versions. I would not, however, count on the compiler optimising the
latter using dedicated instructions.
--
M?ns Rullg?rd
mans at mansr.com
More information about the ffmpeg-devel
mailing list