de la radio logicielle à la radio intelligente

Transcription

*
/
2
,
+,
! "#$%
-$
,$
.0 1 ,
&
%
%
0
" 3#$ 4" %
,
#0
"%
* 6
,$
'0# %"
"
"
, ' 7$ #
'
6 8'
6
9
$#
'
"
)
.
( '
5
)
)
, ')
6
5
"
, '
'3 " : :;
()
)
'-
$
"#0 -
504
-
"
, '< 0 '#0.
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
'-
504
$
"#0 -
-
.$
,$
# #$
03
"055 $
10""$ 1
#$
$
'
9
>
1$ $
!
'-
1 "
=
;
>
9
> 6
9
;
;
;=
;;
@
>
:
?
?
?
?
?
>
92
>
>
#>&
+
;
;
:
:
:
:
:
:
B
B
B
"
A
9
( > 6
>
@
@
" A
"
, '
9
:
:
:
:=
:;
:@
B
'- "
'-
/
=
;
@
'
'-
' +
5
9
> 6
A
C
(
>&
9
2
A
?
?
?
'
A
A
A
A
A
=
;
@
:
2
2
"
, '
5
?
>
6
?
?
?
?
?
?
?
'>
=
;
@
:
B
.
?
??
?
?
'
'
'
5
9
'
'
&
&
6
2
>
>
9
'
2
A
=
;
A
A
D
> >
> >
"
, '< 0 '#0.
5
>
;
@
@
@
:
:
:
B
B
B
=
'-
$
"#0 -
504
-
#$
#
E
F1
$
=
9
=
=
;
'
;
;
;=
@
,
@
@
:
B
#
5 >
>
(6
+
!
!
,
> >
> >
>6
'
2
2
;
1$ $
!
'-
1 "
'-
'- "
'=
==
=@
=@
=:
=B
=B
@@
B=
?
?
B
9
<
?
.$
,$
# #$
03
"
, '< 0 '#0.
=
;
@
;B
B@
B@
BB
'-
$
"#0 -
504
-
#$
10""$
"
1
, '< 0 '#0.
.$
,$
# #$
03
'-
1$ $
!
1 "
'-
'- "
'-
@
'-
504
$
"#0 -
-
!"
.$
,$
# #$
03
# !$#
1$ $
!
1 "
%&" ' #
'-
"
'- "
"
'
( &
'
>
,9
2
6
>
<
1
>&
<
>
( &
−
−
−
7
<
6 7
>
7
> 6
,
6 <
2
2
&
( &
6
9
>
,9
9
6
< 6
G

< 6
+
A
?
>
9
9
,
5
J
&
9
.
9
6
&
9
>
<
&6
<
,
> 6
/ !
/ !
&
9
9
A
6
"$
'
6
9
A9
>&
<
&
&
6
*9
??:)<
%

"$
'
J
6
#
6
+
9
2
<
5
>&
<
A 2
M
( &
*9
>
>
6
K
2
?
2
'9
6
&
&
>
6
>
2
6
9
H
>
9
>
K
6
> >
>
A9
&
#
9
>
,
2
6
5
2
< 9
9
'9
A %
,9 (
( &
A
6
,
"$
'
2
2
<
N
<
:
"
, '< 0 '#0.
)
9
6
<
<
9
G
6
<
'-
504
$
"#0 -
-
N#
.$
,$
# #$
03
1$ $
!
1 "
'-
<
,
( &
2 2
9
1
6
> > < >
A
+
> 2
>
6
6
(2
<
2
6
#
&
>
&6
9
>
&
&
9
>
< 9
9
'9
'- "
<
6
>
( &
,
+K
6
>
6
+
6
6 6
,
>
( &
K
(
6
<
A 2
<
> >
9 &
+
,
>
5
O
<
2
9
> 2
6
9
6
9
*
>
9
>
%
)< 9
6 6
(
2 K
%
9
6 )
<
J
9
,
(
%
5
%
9
+1" )
9
+
6
+
>
9
<
9
)<
9
>
9
9
6
2&
>
9
%
M
( &
&
9
6
K
<
6
)
'
6
'9
6
9&
,
6
*
9
2 6
A
>
P *92
>
6
2
6
> 2
>
> P *
69
9
Q
6
( &
%
9>
9
6
6
.
> ( &
>6
9>
9

N)<
6
$
(
>6
6
%
9
>
>
D
2
"
> 6
9
9
9>
<
6
)<
, '< 0 '#0.
+
6
9
+ +
A
B
'-
$
"#0 -
504
-
.$
,$
# #$
03
'
1$ $
!
>
( 6 <
6
2
6
>
1 "
'-
> $
&
6
2
>
'- "
<
<
O
6
9
*9
<
6
A
2
6 <
6
2
<
$
2
"
, '< 0 '#0.
<
A
'-
504
$
"#0 -
(
-
!
#
1 "
'-
'- "
&
!"&
&
504
'>
>
:
?B
/
D
5
<
2
1
3
"
/
A
"
"
!
6
??@+ ???
"
6
6
6
9
$#
8'
"
5
, '
: :;
) $&
#>&
1
9
$
3"
???
S
"
1
A
#
1
*
5
??@
?? R ??@
??
, '
, 'R'
'
((
1
1
.
'
<
6 <
#
3
6
1"7/#(
T
'
> 6 + 5
,
2
;<@ !-L
#>&
!> U , V $
3
* 6
'$
#
3
( '
!0 # ,
1< ,
. # ,<
> 431$ -<
>
1
$
"< 1
' "# , $
3< * 6
'$
# 3 <
!> U , V $
3
# & >
/
A (
> >
O
9
6
9
$
3"
O
9
9
$
3"
"
'
(*
1
1
1$ $
!
"!%
!$ ' &
(
3
.$
,$
# #$
03
& "!
???
@
"( &
-(
#
+
%
$
@
> > 5
>
"
7>
(,
# !$#
.
&" ' #
>
, '<
"
$
# +#',
"
6
−
−
>
:
%
−
−
−
<
=
=
)<
<
<
;
&
2
<
−
&
2
<
−
;
9
&
2
<
−
6
"
, '< 0 '#0.
&
<
?
'-
$
"#0 -
504
−
−
−
.$
,$
# #$
03
1$ $
!
&
>
>
A
?
−
3#,
: +
−
−
−
=
;
:
1 "
'-
'- "
2<
><
>
&
)
+
A (
%
A
3 #
2
:
>&
5
B<
/
9
9
−
−
A
2 92
9
5
−
,$
03
3#
9
/
+
A
"
9
#
−
−
−
> 6
5
>
"1
/
???
=
−
−
−
−
;
A
=
3 #
9
3#,
>
(
6
A
6
!
>
&
$
"$
"
%
)
O $
'
: &
9
5 M
'
2
"
%
−
'
%
:+
−
S
$
3"
−
&
B
+
=&
% ??:+ ?? )
'>
'
"
'
?)
(
"
'
"
1
6
−
(%
/'
#
21
#
2
,
S
5
S
$
3"
"
"' #
=&
'3 " : B@ % 2+$ # ) % ?? )
S1$ %
)
??
(
'
1
. $
$ /
!0/$ &
# 1 2
3
! (44-5
% >7 )
− "
9
6
6 %=>7 )
−
6
6 %:>7 )
−
6 >
6
%:>7 )
− "
%:>7 )
% ;>7 )
"
, '< 0 '#0.
6
9
$
3"
9
$
3"
??B
−
&
)
'-
504
$
"#0 -
−
−
6
6
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
6 %= >7 )
6
%; >7 )
>
A
−
2&
. $
/$ &
'
'
#
2
A
% ( >&
=
)
#
1
%;<@>7 )
− '
% ;>7 )
−
1"
' $
−
,
=
"
&
3
>
9
> %
<
%
)
! (44-5
> >
&
2
A
)
"1
6
) $&
%
/'
#
'>
A
2
( &
>
/
;
"
! 3
! (4465
−
,
%;<@>7 )
− '
" ' %=>7 )
− /
+
- /$ &
%
2
6
% ;B>)
−
5
# 1%
# '
3
%
>
%
:
$&
/ 5
>) R
2
A
−
−
−
−
#
,
>
6
+
%B >)
6
A
6
A
'
6
'
%
>)
R
%; >) +
% @>) R
A
"
−
9
1
"
, '< 0 '#0.
9
$
3"
'-
$
"#0 -
504
-
*
$ #
"
"
*9
.$
,$
# #$
03
7!
&" ' #
K
< 96
"
96
"'
, '7$ #
"
#>
'
6
C
%"
*
>
>
>
< '
6
1$ $
!
>
1 "
'-
'- "
0
>
5
6
>
$
# <
6
)
,$
'0#
>
%"
C
R
)
2
>
>
>
−
−
−
−
>
(
>
<
>
9 >
% 2
)<
−
−
#
6 <
<
<
6
−
+
>
<
>
>
<
/$
5 >
−
>
+
>
&
<
−
<
−
+
>
%
6
"( 1 2
,
"&
,
1
>
>
)<
>
5,
$
&" ' #
"
"
/
' +
1
#
>&
3
, #'-$
1*0
>&
>
>
1
>&
' +
,
? W
"
#>
* 6
,$
'0#
/
, / 4
'>
> 504
< W"
, '
, '< 0 '#0.
6
W
@ W
= W
'-
$
"#0 -
1
504
B
' +
1
#
1
' +
,
1
:
' +
1
#
1
' +
1
@
' +
1
#
1
' +
=+
B
.$
,$
# #$
03
1$ $
!
1 "
>&
5 > 3 E$
3
>&
0
B
"
"! &% ) &$ 8 9 +
" +.! &;%
8
8
+8&
&
>&
E
" 1
: W
'>
> 504
; W
@ W 3#
"
<@ W"
, '
>&
$ &
$$! "&
>&
>&
0
"
#
'-
'- "
: &$ "&%
%
:
+
&
! ,
:
%
&
&
* 6
'>
"
." %
%
,$
'0#
> 504
!% < &
W
W
: /$
&
>&
0
, U !01
1
>&
@
"
"!
" +.! &;%
# # ./
1
# & " 7!
;!#
! %
&
" +.! &
#"
&
%
: /$
%
%.
>&
* 6
,$
'0#
@W
/
30 E ,
@W
'>
> 504
W
>&
*
+ >
1 , - 4
B "
, '<
6
1
>&
0
=
#
%
& <) $
# # ./
" +.! &;% =
%"&
%
& &
." %
%
* ( 5 >
! $
3<
! ,03'-< *
+/
D
- ,
,
4< * 6
,$
'0#< *
+,
-$
,$ < .
!
3
1
>&
* 6
,$
'0#
:@W
' +
*
+/
D - ,
1 @W
'>
> 504
= W
5
# & >
A
# >
'
"1 /
9@<
> < "
&
>&
$
S
( &
' ,
%'
9
6
9
1! ,S
' ,
6
2
%
&
&
' +
"
, '< 0 '#0.
1
1<
) R
=
'-
$
"#0 -
+
504
:
-
<
*
1
#
"
'-
'- "
&
1 ,0 5
9
$
3"
" +.! &
# &! !
B
: &$ 7!
!"
)
&
! "
0
3 /T1
#
9
#&% &
.
#$
A
>
#
&
:
'
+
E
1
01 $
!
9
.
$
%
%.
A
9
&
&
X (< "
"
(< T
< !"
& &$#
&
5
< "
B
'
&
,*
"(
+
<
+
5
:
"
#
C
*
T
> - '- 5 3$
5
9
& ;!%
% &
9
%
%.
, '< 0 '#0.
9
&
( Z
+"
$
1
9
/
"
"
: /$
'
&.
:
?
B
<
Z
'
Z
>
%
%.
V
+
#
"
&
! %
: &.
! %
& &
9
$
$
"1 /
9B< 1
7> >
"
, '
%
@
;
1 "
>&
5 X Y
, #
:
9
$
3"
<
6
>&
3
$ &
#$
&
%
&
#
%.
!
$;& 7!# !
"
"!
& &%
%
/%
* (
2 .$
"$ F< 0
1 /0 ! "< '>
> 504< 5 >
$
31 E0$
3 < *
+,
-$
,$ < *
> 03"$
3< 0
" 3#$ 4"< 4 "0 ,
1
>&
0
1 /0 ! "
@ W
' +
4 "0 ,
@W
' +
'>
> 504
;@W
5
# & >
A
*
"
R
1
5 >
1"
"(
:
& >&
$
> >
9
$ # 7$
3"
>
+
>
&
6
B+
@+
1$ $
!
' +
"
1
#
,(
:+
.$
,$
# #$
03
><
'-
$
"#0 -
504
5 X Y
, #
"
9
9
#
#
%.
& %"&
%
&
<+ $
2
! 4"
9
%"&
& +
A
"
#
5
"
#
', E
1
??B
=
5
"
#
=
!
"
#
$
"
#
'-
'- "
$
3"
"
% ." %
%
$!% <
9
)
$
" 5
/ $
!"
! %
60
$& # %
%
: &. !
A%
!
%
!$# 7!
!
6(
$ !
#
! "& &%
+
+
!
<
+%3
D
2 "
??
5
/
"
#
1 "
$
/"$
'<
< #%
;!#
! % "
#
! #"
! &
', [ $
3
9
9
! @ " "
!$# 7! C
&' ".
+
;
!
!
'0"[
1
9
$
3"
&$ &.
+#7!
%
%
! %&
'
"
#
1$ $
!
$
9
#"
"
??
9
$
.$
,$
# #$
03
$
3"
#&% &
D 1 # &%$
&$
" &.
$ !%
%
%
# 7! !
5
)
B
! #$
"
! F &% < & &$&
!
E@
/
. 3
9
9
& $
#%
#" $$! "&
, #
9
"
%
B '
" "
$
3"
+
%
!0
* /$
' #
!
: /$
, /$ .# # &
Y5
-
9
$
5 33
1 ""<
$ &
" $$! "&
"
$
3"
& "
"!
)
"<
. :
0
%"&
: /$
#%
#" $$! "&
%$ %
& &
!$# 7! ! " $
, '< 0 '#0.
&%
.
&
)
&
&!0
$
@
'-
$
"#0 -
504
-
,,
A
>
9
B
9
F
, .0 1
"
! %
/
#
@
1
#
-
% "%
%
: "
9
!
&
1 "
'-
'- "
7!
, '
&
&%
% .
." %
<
# &;%
!$ $
$ &
: /$
-
&
." %
&
"
"
! %
&
& &
"
&
, '
5 - "9
'
3
>
(
3#
?
X
#
(%3# )< "
" 1
B
C '
9
RB
'
#> !
, U !01
>
E
2
-(
!% <
&
""! %
"
:
1$ $
!
"
: /$
$&
"& .
B
.$
,$
# #$
03
"
1
9
'
&
Z##
9
2
1 %# & .
9
1
9 "$
T 55,
:
92 ,
'>
<
+5
%#
)<
Z$
##
3 Z'05<
@
B
@
*
+ >
1 , - 4
9
.
<
9 '
@A
R @
'
6
( > 6
'
%
! ,03'@
&; % # " %
%" '
6
<#
!
!
!$#
#"&%
'!
#
3
1
,
?
A
' +
:
*
+
- 3 31 V
'>
> 504 "
, '
,
104,
#
('
4X
>" 3 1
T
"
, '< 0 '#0.
1
%$
(
)
# X( %*
)
')<
'-
$
"#0 -
504
-
6(
$;
"$ #
&%
&
'
C '
SB
$
Z
A
$
'## 9
6*
6,
;A
:
#>
(
*
"
"
5
#
< =
$
(
<
"
&%
" &
1
>
!:
#
>
Z
A
'
<
3 C X
:
'
<
>
<
/
>&
/
"
5
#
1
"
5
#
< /
" +
#
.
(
#- ,0#
B
: 9
$
3"
6
#
%.
"
&! < ' &!
!
"
"!
" +.! &;%
: &$ 7! $
5 >
! $
3<
' ""
< '>
> 504<
/
30 E ,< 1
6 -0 V #< ,
#0
"
,, 0
:
#
:
9
$
3"
6
#
B
:
/
"
< "( < B+
&
*
:
>
'- "
0
< 0
1
/
1'< " < :+=
"1 "(
'
0
< 5(
<! & < +
"
"
< "
'>
L
" C
1
/
# >
" < ;+
@
"
C
S:
'-
" +
#
'
#
1 "
'
" C
Z >
"
'
'05
"
"1 9@
5
7!
#
"1 9
C '
1$ $
!
'
$
&
'
"
'
3 C X
+= \
B
=
'
1
.$
,$
# #$
03
$
&
%
&
! %
&
"
: )
( *
+,
1 T 4" < 1
'>
> 504< 5 >
#0
"
0 F ,
@
#% &
# # ./
=
9
6
& &"# &
%"&
1%
&
, '< 0 '#0.
B
B
$&
%"&
!
" +.! &;%
6 -0 V #< 3 >
* ,$ 3<
$
31 E0$
3 <
" 33< ,
:
6
"
#% &
@
$$&
: /$
.
B
!
&
"
,
%
& <) $
." %
%
B
'-
$
"#0 -
504
*
:
( '>
31
504< /
5 X Y
"
5
#
*
(
< * Y '- 5
30 E ,< *
:
&
'
C '
Z9B
$
'## 9:
'
1
$
! &
"$ "9@
G
& "%
&%
'!
<
<
"
M
'
< "
"
M
5
'
'
1
"(
'
#
>
/
#>
< '
<
<
<
1
$
(
<
< "( < B+
"(
Z
'
< ! & < B+?
'
'
0
X
'
< 0
< " < =
B
Z
<
</
< +;
B
$
'
$
# >
/
#> (
< "( < ;+
:
Z X >
"
"(
<
+;
@
Z
A
R =
<
<
>&
"
#
>
,
9@<
'
9@ ]
4< '>
> 504< *
>
% " )<
??
'0"[
_'
6
> 6
" +
#
<
<
=+@
$
0
" C
1
/
9
_5
1(
*
+ >
1 , - 4<
"1 /
# >
'
2 "
>
9
$
3"
$
< @+
>
$
<
=
'
1
$
"
$
3 C
\
9B
< ! ( !0!3$ #< '>
-$
,$
&%
/
$
"Z '9
'- "
&
#$
&
%
&
#
%.
!
$;& 7!# !
"
"!
& &%
%
/%
2 .$
"$ F< 0
1 /0 ! "< '>
> 504 5 >
$
31 E0$
3 < *
+,
-$
,$ < *
> 03"$
3< 0
" 3#$ 4"< 4 "0 ,
1
"
$# '
$# '
<
$
'## 9
'-
$
' 8
1"19
1 "
6
&
+
#
"
$
5 '9
+,
1$ $
!
, #
6'!
1"
$#
$#
.$
,$
# #$
03
.
R5
"
_
, '< 0 '#0.
C
_
6
,$
'0#
@
^
<
'-
$
"#0 -
504
-
H
>
H
"
5
,$
03 >1
%
%
&; &
&
(
E
/
>
#
,$
03
3
(
E
,$
03
9
'-
'- "
(%3# )
/
>
#
5
"
" 1
9
>
5 X
3 E$
3
/
1
5
1 "
&%
A
3
1$ $
!
"%
%
&; & +
9
/
.$
,$
# #$
03
B
"
(%3# )
" 1
9>
/
B+
H(
>
!
A
3 Z'05``
#
2 92
3 C X
7
2
Z
92
$
'#
A
'05
``
B&
+
B
a+ >
$
$
= (%
"'
+ +
A
:+
=
)
(R >
$
"#
:&
B
=
R ;
"
(
, '
+
3 Z'05
3 C X
@+
2
Z
92
B
$
"#
'05
:&
"'
a
+ +
A
;+
,
(
$
"#
@
"
A
C
C
" '
,
A
(
B&
+
?
"
"
, '
, '< 0 '#0.
?
'-
504
$
"#0 -
-
H*
50 '05
>
&
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
&!0
5
0
+
'0
51
"0'7"0 '
3#,
B+
?
B
A
$
1 05
O
$
$
8
2
6
2
3 #
:+
B
A
="
O
6
3 #7 3#,
=+
@
>
$
7
8
2
"( &
@
H,
,54
#
5
>
5
>
#.
&!0
+
b
.
>
+
A
2
?%
O
HA
1! )
>
1
$
=
"
+
9
>
>
1
5
2
2
'
H6
8
, '
L
!;
%" "
7
+
+1"
1'71 '
!"5< 1! <
81 %
#
+
5#" /11< .
#
>
)
=
A
5=
5=
% 2+5
< T(
;+
>
"
+'
@
A
/ 2Z'15
>M
1" ':
&
%
< ':
??? +
"
, '< 0 '#0.
7
=< ':; :
!3 )
5#" /11
)
'-
504
$
"#0 -
-
I
!"
I
cd "
'>
&
1< '>
1
_<
@A
> 504< * 6
>
#
> 504< 5 X Y
(
"(
#< *
+,
$
-$
,$
_< '>
# >
"
'-
'- "
B
&%
, !-0 Z 4 ,< '>
/
#
0
"1
6
_< ," E$
"
$
A
cd , U !01
>
6
c
=d '>
1
*
'!
1$
!
! ,<
+
1 "
!' &.
0 F ,< ! (!0!3$ #< *
+ >
> 504< _/
511 '
#
2
X" >
(
(
511< $
(
<"
:<
+ =
#
1$ $
!
+7!
&
I(
cd "
1
5
"
.$
,$
# #$
03
> 504< 4
0
'
,$
'0#< _
,0
#< _ 3
!/%/ )
/
2
5
5
<"
, #< _- >+,
5
+
R"
+5
'
> 1
'
1
1
+/
"
C
_<
>
"
$
=
c
;d E
01 $
!
+
*
Z
V< '>
> 504< * 6
C
_< Z (Z
'
$
'
<" C
<
B<
;= R @:
<"
"(
c
@d 5 X Y
, #< /
. 3< *
1 /0 ! "< 4 "0 ,< _
"(
5 !+; "
0
"
R
T C
> eE
@@7 " 7 :7:;=:?
c
:d "
'
>
[
L <
, #< '>
"1
B<
B< A
I*
'
cd '>
3
'
> 504<
6
9 "
3
,
cd '>
3
'
> 504<
6
9 "
3
c
=d '>
> 504<
,$
'0#< _$
>
5
1
'0
< _1
D
"
6
'
'0
< _1
D
"
6
'
+5
.
"(
_<
#< _ / /
/1
1
9 /
_< f/
B ?@<
,
"
B
;;?
1
9 /
_< f/
$
"<
D
"
'
+/
D 3 V 3< '>
> 504< 0
(
5
5#" '
, X_<
*
1
5 >
1" "(
<
:%
:)<
$
1 :;=:?< =
+
> 504< >1 < ,
Z
'
;<
;;+@
T0 3#0
X P #>
+
, '< 0 '#0.
2
B ?:
.$
"$ F< _ 6
_< f
= ? ??? @
+
'-
$
"#0 -
504
-
I,
cd
'!
> - '- 5 3$
<*
$
_<
6
<
Icd
+
#
cd "
#
5
+"
$
5 'SB<
'
6
! ,< '>
>
>&
<
)
,$
'0#< 1
L
</
%
!
c
;d '>
"Z
1 C
=%
> 504<
>
_< $
cd "
#
* 6
'>
cd
.
$
c
=d *
',
#
"
5
2
/ 2
_< $
>
@<
.$
"$ F< 5 X Y
, #< _-Z
(" C
Z X > <.
< " < \
&
5 - "-< '>
>
> 0
(
&%
&' " &"
> 504<
L
"1
> $
E$
301<
,$
'0#< '>
> 504< _#> _"
_<
"$ < #> FF$
F!
< '>
% " )<
#$
3<
6
> 504< *
3 /T- < *
,$
'0#< _ >
C
3 /T- < '>
,$
'0#< ] / !
3 '
>
6
< Z" S <
1 ,0 5 <
"
< =+
,$
'0#< _#C "
+!
( 2
$
5
> 504< ^ >
^< ""'$ '
@< R :
'
'
)
>
"
'- "
' #
(
,$
'0#< _
6
;=g;B< T
" ! $ < '>
, '< 0 '#0.
>
> 504< /
^< 3 Z' "<
E0*#$
" T< >
"0 ,
1< '>
E$
1 ,< *
+'>
> , , 33< _
(_< 5
# Z X >
1 # '
1< '>
'-
"
>
5 - "-<
E$
301< '>
> 504< * 6
2 (
/
. X
>
"
<_ ' C '
S < @+ B
<"
% > '
0
Z
3 C X
'05
)
T0 1 $
<1
'- 5
<*
1
5 >
c
:d , U !01
>
Z X >
1 "
> 504< _ 3 C "
_<
"$ '0SB<
L <
'0
)
6
+
#
"
" $ # %"!
1 ,0 5 < * O 5
5$
14<
,
4< *
>
<5
'
_ , C'
'
'
c
Bd
> 504< *
1
B%
6
&%
,$
'0#< '>
#
$
!$ $
< '>
"1
(
"
+"
> - '- 5 3$
<*
_
'
> 504< _, .
B
&
! ,<
>
,$
'0#< _ !
>
L _<
"$ < #> FF$
F!
"
< '>
% " )<
c
;d
c
@d
,$
'0#< '>
@
@?<
T0 3#0
$
"<
0
+#> +
1(
Z'03S =< " C
Z
'03
)
I6
1$ $
!
&%
"
> - '- 5 3$
<* 6
"
'
B%
"
B
c
=d *
&
.$
,$
# #$
03
+ @A
,$
'0#<
"
> 504< * Y
5
#
' +
<5
>< ; 5
>
>
_< @ >T
><
>
<
> 504< * 6
(
_< @ >
'-
$
"#0 -
T
504
> Z
<
cd "
'>
X >
"
C
< Z" S <
0 F ,< ! (!0!3$ #< *
> 504< _"(
,
1
*
+, +
</
<
"(
)
c
?d E
01 $
!
V< '>
+
$
5 'S:< X<
'
)
.$
,$
# #$
03
1$ $
!
=@+ ; < T
+ >
1$
!
#< *
+,
C > 5,
+ <
: %$
"(
> 504< *
6
: %$
,$
'0#< _
C X > ( +
$
5
01 $
!
+"
< Z" S:<
c d'>
<
> 504<
2
'
'
BB@+BB? %$
c@d 5 X Y
, #< 4
T0 3#0
$
"< '>
#$':2
"$ '0 F$
"
$
, 5 3 <*
> 504< _"( 1
+#
"
'0
c:d
$
"< '>
> Z
<
+
cBd
T0 3#0
_< "
T
> ><
T0 3#0
_
5#"< !"5<
<
<
$
"< '>
'
"
1!
.
#
?:+
%$
c d'>
_1
> 504<
$
/$
> "
C
%$
"(
c?d
T0 3#0
S:< +
0
+Z
_<
.
<"
@<
)
. 3< *
+/
D 3 V 3< '>
> 504< 0
T
/
1
0
_<
"$ '0S@<
( %# 6 ()< "
@
'0
)
c;d*
.
T 3"$ T< 1
, 3V< #>
Z$ .T < "
# / V0,,$
<
2 .$
"$ F< '>
> 504<
- ,$
5$
'< '
10,Z$
3< _5
'
#
_< #> T
> Z X >
@?+:=< T
> ><
< B+
;
(
T
C '
V< 1
!$ $
< _0
" C +1
+ :< T
><
.$
"$ F< "
>
[ , #< _
"(
_< $
5 'S@< .
$
5
'
c=d 5 X Y
, #< /
1 /0 ! "< _"( 1 2 2
>
%
"
$
:<
>
_<
c d '>
1
; >T
5
'- "
-$
,$
>_< $ "9
$
,$
'0#< _/
'
"1 "(
_< '
'
'
,$
'0#< E
0
" C
'-
><
c d, U !01
1< '>
> 504< * 6
5
'
5
*
:< 5(X
<! & <
+ @%
Z
3 C X
'05
)
> 504< * 6
'
> Z X >
<
:
1 "
+/
D
2 2
5
'
)
3 V 3<
T
1
"
1"
>
_<
%
< =+:
> 504< _
X >
" C
" C
< Z" 9 <
> 504< ,
5.
1<
, '0
( " C
.
5
+
#
>_< E#' / S
<
' (< " < +
E >
# >
('
)
T0 3#0
$
"< ,
5.
!"5
1! 5
_< $
"#S
<# >
<$
#
)
$
"< '>
A > ! ,#'- E<
>
T0 3#0
$
"< 5
>
" C
< Z" 9;<
> 504< ,
, '< 0 '#0.
5.
1<
71
< +=
, '0
1<
, '0
?+
;<
<
<
" >
<_
=
'-
$
"#0 -
504
" C
$
"" S
$
c d'>
1
E
>
<T
"(
,
<
c d'>
"
' (<
"(
!
&
cd , U !01
1< 0
1
/
# >
'
+ >
'
.
+ >
c
@d " X
,
# >
1<
"(
/
V.
< _'
'
<*
'
9:< 0
01 $
!
+
(
:< 5(X
6
"
6
.
&
,
;
0 F ,< *
1$
3< *
+
"5 E
> < " <
.
(
C
"
_< "1
#
> 504<
, '< 0 '#0.
+
$
B<
,
4<
" C
SB< +@ A
"
#< '>
"
6
' +
'
"
B
,$
'0#< '>
> 504< *
#
"
SB< +@ A
> 504< '>
C
_< "1
:
/
,$
'0#< _
>
(
< 5(X
>
"
'
<! &
+ >
1$
!
#< 3
. ,#
<*
!0 .
1< '>
> 504< _ 5, /
"1 "(
_< "1 /
# >
'
@
"
< _/
<,
4< _'
"1 "(
B
S:< ;+ A
_<
< ;+ A
cd *
+ >
1 , - 4<
,
4< '>
> 504< * 6
_5
1(
"1
/
# >
'
9@<
> < " <
@
c
?d "
!0
, '0
" S
&% &' " &"
% ." %
%
C
,$
'0#< '>
1(
'
Z
c
Bd '>
> 504< _ Z. R T (
3 Z'05 Z X >
> $
"# 5
=!_<
2 >
4< '>
> 504< _1
"1
9B< 1
< " < @+?
> 504< *
_< $
"# 5
<! &
'- "
-(
"
@ !-L
_< $
"""# S
? <"
>$
)
%
& &
,$
'0#< 4 ,0
(
"1
% " )<
V< '>
_<
'-
@!
%$
)
5.
1< _
> > Z X >
> 504<
< " < @+?
,
>
'
1
# >
1 , - 4<* 6
/ !
_< $
"# 5
Z
<-
B< .
c
:d E
L
> Z 3!< '>
1(
(
9B< 1
3 /T- <
" ! $ <*
1 , - 4<_
_< $
"# 5
Z
'
<-
>
5.
1 "
)
" +
#
%
$&
1 , - 4<
_< "1
c
;d *
_
$
"< ,
" C
1$ $
!
!
; @+;
<
> 504< !> U , V $
3< * 6
'$
# 3 <_
1"7/T
#
, X
6
" < +;
?? <
@B +@B@ %$
"
+"
# > 6
IG
c
=d
T0 3#0
/
%
>
T0 3#0
$
"< '>
> 504< ,
( " C
"(
_< /
T
:?+B;< T
> ><
< ?+=
T (
cd *
> #
#
< 5 ( < =+ : \
"
> 504<
$
/ 5#" #
< " < @+
A
"(
c d
.$
,$
# #$
03
:
,$
'0#<
_< "1
> '
+
C X
$
5
9@<
'-
504
$
"#0 -
-
c d'>
> 504< >1 < "
>
T0 3#0
$
"< >1 < _ "1
, C
- >1
_< "1 /
;
[
C
.$
,$
# #$
03
, #<
2
$
# >
'
1$ $
!
1 "
'-
'- "
.$
"$ F< >1 <
'
"(
< >
2< " <
c d '>
> 504< 5 X Y
, #< "
0 F ,< *
+ >
1$
!
#< ! (
!0!3$ #< >
1 "/
4< 3
. ,#
<*
+
!0 .
1< 4
1 3 /< _ 5,
Z
"
"(
_< "1
/
# >
'
< >
2< " <
;
c d5 X . 03V ,< L" $
1 ,< *
.
T 3"$ T< 1
.$
"$ F< '>
> 504<
T0 3#0
$
"< 5
Z ,# < " T $
, TT #< ,
5
< #>
.
!
6
_< $
"# 5
Z
"
< ,( < A
;
c=d
T0 3#0
"
$
"< '>
> 504<
(
+ >+
3 C X
Z
C
=%
c;d'>
"
3
> 504< !> U , V $
3< *
"
1"7/T
</
< =+ B A
?? <
>"(
)
IH
cd /
?? %
30 E ,< '>
2
+
Z X >
'0"[
< '>
'
/
cd '>
"
< :+
??? %
;!
#
<*
+
< _ 5,
<
1 ,
. ,#
_< "1 /
/
)
#
<*
< _E
3
"
c
=d '
1
"
> 504< !> U , V $
3< *
1"7/T
<
</
< +;
)
1 ,
3
. ,#
ZZ /
cd
&
#
'>
E$
0#< _$
_< Z5 S
? <
)
6
'$
# 3 <_
> #
'>
??B %"
# &. .7!
(+.
/ !
(
< ;+ @
"$ #
-(
_< *
+ !$
&
!0 .
1< ! (!0!3$ #< '>
C
C
; %Z
Z
>/
)
+
!0 .
1< ! (!0!3$ #< '>
"(
' >
(
( >
<5 (
<
<
; %"
> 6
/
#C ' >
_< $ "S
? <
&% &' "&"
1
>
"
<
$
&%1 "& &"/
&
<! &
)
> 504< !> U , V $
3< * 6
'$
# 3 < _5
1
#
> 6
; @ !-L
_< *35S
??<
L&
*
3
5
+
)
I 4
cd
3 <_
> #
?B %
> 504< 0
'5 (
5
+
#
%"!
.$
"$ F <_
2
_< 3Z$ < 5(X
$
>
'$
#
<
./
II
cd
6
2
, 3V<
2
- ,$
5$
'< "
< _/
'
"1
/
> <
"
"
&!0
> 504<
_
> 504<
1
C
1
< _"
C
+
_
, '< 0 '#0.
@
'-
$
"#0 -
504
-
I
cd '>
"
#
#>&
#$
"
> 504< _'
-(
1"7/> 6 +5
1
<$
3"
cd '>
2
&
c
?d 5
.
X
c d,
'
6
>
(
B
"$ 1
5 >
1
2
>
> 1
-Z
1 B< _1 1 B R
_
-Z
<"
2
<$
$
$
7Z @71@ 7 :
>
> 1
> T ('
:
1
C
X
>
$
_< A
; ?< _1 ; ? R /
:
"Z 5
-Z7"Z
1
:
_<
/
>
"
B<
.
C
7
C
>
\
:
(
_<
\
:
3 Z'05 Z 1 @< _1 1 @ R
L
C X_<
@
c;d,
:
>
_<
<1
+$
$R 1= _$
(" >
c=d ,
$
<
" >
+$
$R 1@ ;< _#
X
_< A
+$
$R 1@ < _ 6
_< $
"#+
@+ BB ;7
c d,
>
R"
+$
$R 1@ < _1
_<
c d3 Z'05
_<
c@d ,
< _1 ;
B
3 Z'05 Z
1
>
/
>
+$
$R 1@ =< _
"Z 5
??@
B
;
cd ,
< :A
1
_<
c
;d 3 Z'05
_<
c
Bd ,
/ 2
; @ !-L
_<
'
<$
3"
6
'
; ;< _1 ; ; + "
c
:d ,
6
2
2
S
_
c
=d 3 Z'05
B
C X
'- "
???
5
1
(
>
3
1
'
A
+$
$R 1= = _
c
@d ,
'-
;!
&!0
#%' &;%
"%
%
&; & + !
#
+$
$R 1B
< Z>
.
cd ,
#
,
<
1 "
#!
4
cd ,
5
"
1$ $
!
"
S "( &
#(
T
> 504< _, "( &
6 _< 5
4
.$
,$
# #$
03
+ (
3 Z'05 Z
_<
1 ;< _1 1 ; R
@
/ 2
1
3 Z'05 Z
> 1
_<
1 =< _1 1 = R
@
!
/
"
, '< 0 '#0.
2
C X
1"
'-
$
"#0 -
504
-
4(
>
$
1 05
cd ,
$
1 05
< _' >
c
=d ,
A
$
1 05
:
<_
A
3 #
=" R _
c
@d
A
3 #
=" R _1
c
:d
A
3 #
=" R _5
c
Bd
A
3 #
=" R _
4*
cd '>
81<
c
=d
c
;d '>
#>
9
6
_
& _
_
>
!;
" "
%" "
! ;!
_<
5
,
> 504<
#
81<
</
<@A
#
>
(
>
</
'0
< '>
<5 >
(
81
>
'0
<_
> 504< _
$
5
5
>
</
'0
<_
81
&
,$
'0#< '>
2
6 3f '
5
+/
#
81<
< ?
R
>
"
#
</
>
_<
,
81<
<
_<
5
,
> $
5
R
:
_<
S
> 504< _
81
c
Bd
_ 504< _/
>
4,
'- "
S _
> 504<
c
@d '>
c
:d *
>
&
!
(
>
'-
_<
T0 3#0
$
"<
R
$
$
>
#
81<
</

.$
,$
# #$
03
&
"
5
$
+
_<
>
R
(
>
5
< ?
$
81
_<
"
> 504< _,
+h
'
_< '
9
:=
?B<
> 1
cd
S
>&
R
c
?d
S
>&
R
"
# f?@ =@
??:<
??:<
, '< 0 '#0.
> 1
> 1
# f?@ =@
# f?@ =@
B
'-
$
"#0 -
504
-
"
, '< 0 '#0.
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
'-
$
"#0 -
504
-
#$
#
"
E
F1
, '< 0 '#0.
.$
,$
# #$
03
1$ $
!
'-
1 "
'-
'- "
'-
?
'-
=
$
"#0 -
504
-
"
, '< 0 '#0.
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
'-
$
"#0 -
504
-
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
!
"
, '< 0 '#0.
=
'-
$
"#0 -
504
-
$
.$
,$
# #$
03
1$ $
!
1 "
2
<
6
9
<
9
'
&
< 6
6
(
2
< A
#
9
<
4
,
<
>
<A
9
*9 &
A
5
>
T0 3#0
2
*
6
< 6
<
A
,$
'0#
>
"
6
9
9
>
> $
A
&
(
9
L!> U , V $
3 6
*
6
2
>
>
9+ +
- ,,0'0 6
9
& O
> 2
9
<
A
> >
; 9
6
,
$
"
>&
>
<
>
<
<
A
$
9
9
2
*
<
A
9
>
9
>
A
96
,9
<
2
A9
>
<
9
*9
\
> >
9
6 &
(
9
2
26
A
=
6
<
6
, '
< A
9
@
9
2
<
<
9
>
6
"
'- "
" $
$ (
>&
>
'-
"
, '< 0 '#0.
2
>
&
=
"'
96
9
K
*
6
+
6
"
9
<
6
'-
$
"#0 -
504
-
(
G
c
=d
c
;dH<
6
6 6
> ><
> >
2
9
6
A9
>
&
<
> >
<
1 "
'-
'- "
'
9
<
,
2 %
1895
1901
1947
1948
1970s
1980s
1991
1995
1995
1996
1996
1997
1997
1998
1999
1999
2000
2001
2002
2003
2003
2004
2004
2005
2005
2006
2007
2008
<
cd
cd
> >
2
6
6 6
6 9
>
9
+
< A9
/
6
<
) .!
1$ $
!
!"
'
,
2
.$
,$
# #$
03
K
< A9
>
)
&
6
9
(
K
6
6
6
- MARCONI transmet des signaux radio sur 2,4 km.
- Première liaison radio transatlantique par MARCONI.
- Invention du transistor par BRATTAIN, BARDEEN, SOCKLEY.
- Théorie de l’information par SHANNON. Fondamentaux du numérique.
- Programme ICNIA de l’US Air Force : radio embarquée sur la bande 30-1600 MHz (abouti en 92).
- Premiers systèmes radios à µ-proc. pour contrôler les circuits analogiques de réglage de fréquence.
- Introduction des premières radio VHF multi-mode dans l’armée américaine
- Premiers systèmes radios à base de DSP pour le contrôle du modem en temps-réel.
- Amélioration des convertisseurs analogiques numériques.
- Tactical Anti-Jam Programmable Signal Processor (TAJPSP) pour l’opération simultanée de formes
d’onde utilisant une approche modulaire. Ancêtre du programme SPEAKeasy.
- Première utilisation de l’expression « software radio » par Joe MITOLA.
- Premier IEEE Communication Magazine Special Issue on Software Radio.
- Première démonstration – SPEAKeasy US DoD.
- sujet de thèse orienté conception d’ASIC pour le canal de diffusion troposphérique
- Création du MMITS Forum qui deviendra le SDR Forum en 1999.
- Premier groupe d’étude sur la Radio Logicielle au Japon.
- Ré-orientation du sujet de thèse vers une implantation dans un FPGA
- La Commission Européenne sponsorise un Software Radio Workshop à Bruxelles.
- SPEAKeasy II démonstration d’un système multi-mode multi-bande.
- Investigations sur les plates-formes Mercury à base de PowerPC, mais trop tardif par rapport à
l’avancement de la thèse
- L’US Navy se dote de la Digital Modular Radio (SDR de 2 MHZ à 2 GHz).
- Initiation du JTRS (Joint Tactical Radio System) par le DoD.
- Première utilisation de l’expression « cognitive radio » par Joe MITOLA.
- Adoption du modèle de démonstration multi-mode multi-band conjoint entre la France et l’Allemagne.
- Deux numéros IEEE Communication Magazine, un IEEE Personnal Communications magazine, un
IEEE Selected Areas in communications sur la radio logicielle.
- début 99 : Création de l’activité Software Radio de Mitsubishi Electric ITE-TCL
- mi-99 : acquisition d’une plate-forme Tx/Rx et premiers développements multi-standards
- septembre 99 : représentant de Mitsubishi Electric au SDR Forum
- démonstrations multi-standard en FI pour la partie modulation de la couche physique : UMTS-FDD,
GSM, EDGE, BlueTooth
- début d’approche de conception multi-processeur hétérogène de haut-niveau (de type SynDEx)
- Premier « Report and Order » de la FCC sur la SDR.
- le CRC (Canada) développe la première version open-source en Java du SCA.
- deux numéros spéciaux de la revue des « Annales des Télécommunications »
- première démonstration temps-réel de reconfiguration par téléchargement (OTAR en FI) d’organes
de la couche physique (modulation EDGE – application vidéo)
- présentation du système de la démo dans les locaux de VANU Inc ; à Boston
- projet RNRT A3S : approche de conception de haut-niveau (de type UML)
- La FCC définit avec le SDR Forum la Cognitive Radio et la situe par rapport à la Software Radio.
- Le projet E²R est initié par la Commission Européenne.
- Première certification d’un produit commercial SDR de Vanu Inc (USA)
- participation au projet E²R
- travaux sur SDR et UWB
- Le ministère de l’intérieur et des communications du Japon change la loi sur la radio pour permettre
l’introduction de produits SDR dans la bande des 5 GHz.
- équipe SUPELEC SCEE / IETR
- participation à NEWCOM Project D et Department 4
- participation au projet E²R phase II
- participation au projet IDROMel - RNRT
- participation au projet MOPCOM – RNTL
- SUPELEC est l’une des 7 universités internationales à participer au Smart Radio Challenge organisé
par le SDR Forum : équipe MENHIR (Modem for ENHanced Interoperable Radio) dont je suis le tuteur.
- éditeur du numéro spécial sur la radio intelligente de la revue « Annales des Télécommunications »
7!
%
& &
"
% ." %
%
, '< 0 '#0.
%
& &
%
%.
K
-L
K
GL
==
'-
504
$
"#0 -
'
-
2
>
>
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
>&
/
K
5 >
!
9 >
#>&
5 X Y
, #
#>&
*
+ >
1 , - 4
+
#>& "
#
! ,
#>&
, U
!01
1
+
#>&
>
- '- 5 3$
) .!
(<
$
,
>
&'&!0
>
/
9
"&
&
<
2&
,
&
>
2
9
>&
:
2&
E
5 X Y
>
>&
<
01 $
!
2
"
V6
6
9
>
#
, # 6
A9
5
>&
<
+
@
<
>&
>
>
*
+ >
2 6
J
:<
1 , - 4
>
K
>&
2
1
>
1
! ,
&
9
>
,
<
6
2
&
>&
2
, U !01
9 >
69
9 >
1 6
A9
*
2
+ >
'
1 , - 4
(
9
<
9
( &
<
69
6 9
>
>&
9
>
I
=;
#
"
, '< 0 '#0.
6
> - '- 5 3$
9
96
6
2
>
<
'-
504
$
"#0 -
*9
6
>
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
>
<
2
<
1
<
A
>
>
6
'9
<
9
A9
> 2
O
O
6
A
6
69
L
><
96
%
6
>
K <
< A9
2
2
6
< 6
<
96
<
(
( &
) #
2
2
9
A
9
A
6
1
9
<
(
*
&
%&
*
"
&
9
6
% ." %
%
6
69
9
,
92
6
K
<
9
6
6
6
69
K
,9
6
> 6
9
69
(
(
<
<
<
"
<
"$
'
#
2
>
>
K
9
>
>
%
"
)<
9
"
6
"
1
6
< A9
<
<
2 6 A
,9
<
2
9
6
6
< A
)
"
A
cd
cd
c
@d $
&
2
9
9
6
L
6
A9
2
>
2
9
>
2
(
2
2
6
>
6
A
O
( &
9
>
&
2
6
> 2
5
&
>
< 9
(
>
<
9
c
Bd'
1" < / ! <
9
$ 9
6
c
:d
c ( dc (=d
,
c (d
,
>
6
%
9
9
2
6
9
( &
<A
6
6
>
6
<
>
"
%
, '< 0 '#0.
+
&
<
>
5,) c (;d
c (@d 3
=@
'-
504
$
"#0 -
c d
2
2
2
1
.$
,$
# #$
03
1$ $
!
+
< A
69
1 "
cd
9
6
6
9
<
,
6
&
9
,
$
)
0
)
1 1 2
*# " c d 5
#
<
&
"'
"'
9
69
O 9
)
>
<
"
+
9
2
2
"'
>
c d
<
A9
"(
6
&
5
>
A c (:d
<
9
)
9#"$
c?d
9
A
<
c
;d
5
>
A
/''
'
c d
<
<
O
"''; c;d
B<
>
>
)
2
2
2
>&
9
$
6
Z!;
"
2
"
, '< 0 '#0.
2
<
A
#E 1
69
=:
<
%
<
?
9#"$c?d
c
=d
<
9
9
<
2
9
Z!
9
c d
<
c=d
A
2
6
<
69
9 &
$
6
6
< 6
(
/'' c d6
,
> > 19
A
&
A
9
%
%.
!
>
6
6
A
,
>
c:d
<
"%
A
A
'0 .
2
*(
69
'0 .
2 6
9
9 >
'9
6
c=d
c;d
<
c@d E
# >
!3
cBd /
2
9
1 1 $
6
9
2
>
9
#
6
<
A
$ 9
(
>
1 1 % " 1
%" C
'
6
$ 9
%
2 K
&6
%
"'
c
?d
>
> ,9
cd
>
<
>
6
L2
2
)
6
2
2
1
'- "
*92
(6
'
'-
'-
504
$
"#0 -
6
*
9
9
-
$ 9
(
.$
,$
# #$
03
1$ $
!
2
&
6
6
2
Z!
2
"
! %
& &
,
&
>
5
*9
9
>
"
2
,
>
6
> >
% ." %
%
6
A
9
9
>
> 2 '
2 2
(
6
2
< ( 6
9
9
'9
2
> 2
( &
6
9
'- "
9
1
9#"$6
[
A
( &
< A
6
,
#
%.
% ." %
%
" "
,
>
L
>
2
( &
,
,
&
+
&
6
6
6
!
%
&
( &
6 <
6
%1" < !
6
<
< i')<
>6
,
&
9 >
+
+
<
K <
/ !
>
9"$
'
>
&
<
9
+
+
+
+
+
3
2
<
+
<
6 <
+
&
>
<
>
(
2 ,
&
<
6 9
>
<
<
&
6
\N '
A
9
9
2
6
A
K
A9
>
5 X Y
+ +
9
2
>&
'-
9
6
<
9
K
,
1 "
2
A
9
K
A
9
A
???<
9
>
6
&
9
A c
?d 9
,9
$ # 7$
3"
, #
9
'9
6
A
5
,
>
>
6
"
, '< 0 '#0.
=B
'-
$
"#0 -
504
-
.$
,$
# #$
03
1$ $
!
1 "
'-
9
&
2
9A
'
9
9 A
9
9
O
>
/ !
1
! ," %$
>
6
(
'
)
2
%
<
A
2
&
9
/ ! <
9
6
K
6
>
9
>
<
>
6
9
+ +
J
<
9
+
<
<
9
92
<
6
9
'
K
/ ! )<
&
%
<
9
<
&
&
%
>
(>
9
K
c (Bd '
)
6 6
%
&
'
6
&
>
<
&6
>
'- "
M
6
+
!
) .!
* M )%
,9
6
cd
c@d "( 1 2
2 (
<
> 9 >
"( 1 2
&
2
>
6 <
2
K
=
"
"
#&%
! %
& &
J
D
% ." %
%
>
"( 1 2
<
9
9
+
6 < 9 + +
J
>6
"
, '< 0 '#0.
9
$
3 $
>
<
6
9 >
I
J
+
9
'> 6
!3
<
'-
504
$
"#0 -
-
5;<
.$
,$
# #$
03
1$ $
!
1 "
'-
>&6
'
9
9
+
+
6
>
'- "
+
2
<
2
1
6 <
<
>
&
9
6
,
>
9
,9
"$ c ( d
>
2
A
>
+
6
c (?d 6
9
&
FIFO
TCP
FIFO
5 X Y
, #
K
2
1
c:d Q
6
2
) " >&
2
9
"
>
! %
!
Vitec
!
$
%
&
$&.
9
#
&
>
9
"( 1 2
;&
9
%
6
X
"
)
1"
"( 1 2 c
?d
<
3
9
6
6
6
5 X Y
, #
"( 1 2 ,
>
6
, #
2
>
%#$ ':2
6
69
5
>
"( 1 2<
5 X Y
9
>
2
J
+
<
6
9
9
6
'
>
2
"
>
"
6 6
6
, '
9
A
9
'>
"
+
>&6
+
6
1
% ." %
%
<+ $
&
"( 1 2
9
>
'
>
"( 1 2 ,
PCI BUS
Pentek
&
9
9
PC
+
DSPs
TCP
Sundance
BIFO
Pentek
Vitec
%"&
MPEG-4
Decoder
FIFO
FPGAs
+
DSPs
SDB
Sundance
PCI BUS
;
UMTS
Demodulation
FPGAs
+
DSPs
PC
'
9
6
/
UMTS
Modulation
MPEG-4
Coder
,M
=
9
2
K
) .!
/
+
<
, '
"1
/
,
6
"
2
<
, '< 0 '#0.
E
#
96
>
"
"
, '
2
=?
'-
504
$
"#0 -
-
"( 1 2<
=" %
>
A
2
>
>
6
.$
,$
# #$
03
6
>
1 "
/
>
6
A
3 #
,
K
A
"( 1 2
"( 1 2
="
5,
&
,9
> 51
3 # '
>
>
9
=<
A9
="
5,
&
&
,
(
2
<
6
="
<
>
c ( d ="
%
<
N)< &
'- "
>
'9
9
"( &
)
<
9
'-
<
2
>
1$ $
!
*
<
5$
#0,
&
@
K
'
69
<
9 A
9
> 2
+
/ ! <
(
<
9
19Q
"( 1 2
<A
>
6
A
3#, 5
6
9
<
& >
(
51 7 5, A
69
E-1,
( >
/ !
5
,9
9
A
2
>
>
+
9
$ # 7$
3" )
5 X Y
:
, # %
9
<
6
A
&
, #<
'9
69
>&
>
'
'
*
$
>
2&
#
6
>
6
>& 5 X Y
"
, '
() ' * +
%
"
&
A
*
A
, #
, %
. 3< 6
9
$ # 7$
3"
;
>
() ' *
"
'*'
9
A
'
2
5 X Y
, #
"
&
*
:- $
>
O
, '< 0 '#0.
6
>&
5 X Y
/
96
Hindawi Publishing Corporation
EURASIP Journal on Applied Signal Processing
Volume 2006, Article ID 64369, Pages 1–13
DOI 10.1155/ASP/2006/64369
Rapid Prototyping for Heterogeneous Multicomponent
Systems: An MPEG-4 Stream over a UMTS
Communication Link
M. Raulet,1, 2 F. Urban,1 J.-F. Nezan,1 C. Moy,3 O. Deforges,1 and Y. Sorel4
1 IETR/Image
Group Lab, UMR CNRS 6164/INSA, 20, Avenue des Buttes de Coësmes, 35043 Rennes, France
Electric ITE, Telecommunication Lab, 1 Allée de Beaulieu, 35 000 Rennes, France
3 IETR/Automatic & Communication Lab, UMR CNRS 6164/Supelec-SCEE Team,
Avenue de la Boulaie, BP 81127, 35511 Cesson-Sévigné, France
4 INRIA Rocquencourt, AOSTE, BP 105, 78153 Le Chesnay, France
2 Mitsubishi
Received 15 October 2004; Revised 24 May 2005; Accepted 21 June 2005
Future generations of mobile phones, including advanced video and digital communication layers, represent a great challenge in
terms of real-time embedded systems. Programmable multicomponent architectures can provide suitable target solutions combining flexibility and computation power. The aim of our work is to develop a fast and automatic prototyping methodology dedicated
to signal processing application implementation on parallel heterogeneous architectures, two major features required by future
systems. This paper presents the whole methodology based on the SynDEx CAD tool that directly generates a distributed implementation onto various platforms from a high-level application description, taking real-time aspects into account. It illustrates the
methodology in the context of real-time distributed executives for multilayer applications based on an MPEG-4 video codec and a
UMTS telecommunication link.
Copyright © 2006 M. Raulet et al. This is an open access article distributed under the Creative Commons Attribution License,
which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.
1.
INTRODUCTION
New embedded multimedia systems, such as mobile phones,
require more and more computation power. They are increasingly complex in design and have a shorter time to
market. Computation limits of critical parts of the system
(i.e., video processing, telecommunication physical layer) are
often overcome thanks to specific circuits [1]. Nevertheless, this solution is not compatible with short time designs
or the system’s growing need for reprogramming and future capacity improvements. An alternative can be provided
by programmable software (DSP: digital signal processor,
RISC: reduced instruction set computer, CISC: complex instruction set computer) or programmable hardware (FPGA:
field programmable gate arrays) components since they are
more flexible. Efficiency loss can be counterbalanced by using multicomponent architectures to satisfy hard real-time
constraints. The parallel aspect of multicomponent architectures (programmable software and/or programmable hardware components interconnected by communication media)
and possibly its heterogeneity (different component types)
raise new problems in terms of application distribution.
Real-time executives developed for single-processor applications can hardly take advantage of multicomponent architectures: handmade data transfers and synchronizations quickly
become very complex and result in lost time and potential
deadlocks. A suitable design-process solution consists of using a rapid prototyping methodology. The ultimate objective
is then to go from a high-level description of the application
to its real-time implementation on a target architecture [2]
as automatically as possible. The aim is to avoid disruptions
in the design process from a validated system at simulation
level (monoprocessor) to its implementation on a heterogeneous multicomponent target. Performances of the process
can be evaluated by different aspects as follows:
(i) maximal independence with regards to the architecture,
(ii) possibility of handling heterogeneous multicomponent architectures,
(iii) maximal automation during the process (distribution/
scheduling, code generation, including data transfers
and synchronizations),
(iv) efficiency of the implementation both in terms of execution time and resource requirements,
2
(v) reduced design time,
(vi) enhanced quality and robustness of the final executive.
The methodologies generally rely on a description model,
which must match the application behavior. These applications are a mixture of transformation and reactive operators
[3]. A transformation operator is based on the data-driven
process: input data is transformed into output data. A reactive operator is one, which is event-driven and has to continually react to stimuli. In practice, systems are a combination of both. Nevertheless, an important distinction can be
made between systems with deterministic scheduling whose
operators are mainly transformation-oriented, and systems
with highly dynamic behavior whose operators are mostly
reactive-oriented. For the first class of system (including signal, image, and communication applications), DFG (data
flow graphs) have proven to be an efficient representation
model. They enable automatic rapid prototyping and lead to
optimized scheduling [4].
This paper deals with a rapid prototyping methodology based on the SynDEx tool, which is suitable for
transformation-oriented systems and heterogeneous multicomponent architectures. Major contributions concern two
points as follows:
(i) method and tool, more specifically about automatic
distributed code generation from SynDEx,
(ii) a complex multilayer application including video and
digital communication layers, going from its high-level
description to its distributed and real-time implementations on heterogeneous platforms.
SynDEx automatically generates synchronized distributed
executives from both application and target architecture description models. These executives specify the inner component scheduling and global application scheduling, and are
expressed in an intermediate generic language. These executives have to be transformed to be compliant with the type
of component and communication media so that they automatically become compilable codes. In this article, we will
focus on this mechanism based on the concept of SynDEx
kernels, and detail new developed kernels enabling automatic
code generation on various multicomponent platforms.
The design and the distributed implementation of a multilayer application composed of a video (MPEG-4) and a
digital communication layer (UMTS) illustrate the methodology. An MPEG-4 coding application provides the UMTS
transceiver with a video coded bitstream, whereas the associated MPEG-4 decoder is connected to the UMTS receiver in order to display the video. The result is a complete demonstration application with automatic code generation over several kinds of processors and communication
media.
The digital communication layer under investigation is a
UMTS FDD (frequency-division duplex) uplink transceiver
[5]. UMTS is the European and Japanese selected standard
for 3G. It has already spread to many areas of the world,
but is not yet predominant. 3G should enable us to benefit
from new wireless services requiring quite a high data rate
up to 2 Mbps. Typical targeted applications go from wireless
internet to video streaming, and also include high-speed picture exchanging and of course voice.
MPEG-4 is the latest multimedia compression standard to be adopted by the moving picture experts group
(MPEG) [6]. The prototyping of MPEG-4 video codecs over
multicomponent platforms and their optimizations are studied in the IETR Image Group Laboratory. A part of the
project has already been presented in [7]. We will therefore
focus on the coupling between the UMTS and MPEG-4 subsystems rather than describe the video codec in detail.
The paper is organized as follows: Section 2 introduces
the SynDEx tool and the AAA methodology. Our contribution in terms of prototyping platforms and executive kernels is described in Section 3. The UMTS description according to the AAA methodology and its implementations are
explained in Section 4. The methodology is illustrated and
validated by the application (MPEG-4 + UMTS) described
in Section 5 what allows to reach a new stage in the relevance of the method. Finally, conclusions and open issues
encountered during the application development are given
in Section 6.
2.
SYNDEX OVERVIEW
SynDEx1 is a free academic system-level CAD (computeraided design) tool developed in INRIA Rocquencourt,
France. It supports the AAA methodology (adequation algorithm architecture [8, 9]) for distributed real-time processing.
2.1.
Adequation algorithm architecture
A SynDEx application (Figure 1) comprises an algorithm
graph (operations that the application has to execute), which
specifies the potential parallelism, and an architecture graph
(multicomponent [10] target, i.e., a set of interconnected
processors and specific integrated circuits), which specifies
the available parallelism. “Adequation” means efficient mapping, and consists of manually or automatically exploring the
implementation solutions with optimization heuristics [9].
These heuristics aim to minimize the total execution time
of the algorithm running on the multicomponent architecture, taking the execution time of operations and of data
transfers between operations into account. These execution
times are determined during the characterization process,
which associates a list of characteristics, such as execution
times, necessary memory, and so forth, with each (operation,
processor)/(data transfer, communication medium) pair, respectively.
An implementation consists of both performing a distribution (allocating parts of the algorithm on components)
and scheduling (giving a total order for the operations distributed onto a component) the algorithm on the architecture. Formal verifications during the adequation avoid deadlocks in the communication scheme thanks to semaphores
1
www.syndex.org
M. Raulet et al.
3
User
Architecture graph
Constraints
Algorithm graph
SynDEx
Adequation
distribution/scheduling
heuristic
Generic
synchronized distributed executives
Target 1
kernel
·
Timing graph
(predictions)
Target N
kernel
Comm M
kernel
M4
Dedicated executives for specific targets
(specific compilers/loaders)
Figure 1: SynDEx utilization global view.
inserted automatically during the real-time code generation.
Moreover, since the Synchronized Distributed EXecutives
(SynDEx) are automatically generated and safe, part of the
tests and low-level hand-coding are eliminated, decreasing
the development lifecycle.
SynDEx provides a timing graph, which includes simulation results of the distributed application and thus enables
SynDEx to be used as a virtual prototyping tool.
In the AAA methodology, an algorithm is specified as an
infinitely repeated DFG. Each edge represents a data dependence relation between vertices, which are operations; operation stands for a sequence of instructions, which starts when
all its input data is available and which produces output data
at the end of the sequence. In SynDEx, there is an additional
notion of reference. Each reference corresponds to the definition of an algorithm. The same definition may correspond to
several references to this definition. An algorithm definition
is a repeated DFG similar to those in AAA, except that vertices are references or ports so that hierarchical definitions of
an algorithm are possible.
2.2. Automatic executive generation
The aim of SynDEx is to directly achieve an optimized implementation from a description of an algorithm and an
architecture. SynDEx automatically generates a generic executive, which is independent of the processor target, into
several source files (Figure 1), one for each processor [11].
These generic executives are static and are composed of a list
of macrocalls. The M4 macroprocessor transforms this list
of macrocalls into compilable code for a specific processor
target. It replaces macrocalls by their definition given in the
corresponding executive kernel, which is dependent on a processor target and/or a communication medium. In this way,
SynDEx can be seen as an off-line static operating system that
is suitable for setting data-driven scheduling, such as signal
processing applications [12, 13].
SynDEx kernels are available for several processors, such
as the TI2 TMS320C6x (C62x, C64x) and the Virtex FPGA
families, and for several communication media such as links
SDBs (Sundance digital buses-Sundance high-speed FIFOs),
CPs (comports-Sundance FIFOs), BIFOs (BI-FIFOs-Pentek
FIFOs), PCI bus, and TCP bus presented in the following section.
2.3.
Design process
Our previous prototyping process integrated AVS3 (advanced visual systems) as a front-end [14] for functional
checking. AVS is a software designed for DFG description
and simulation. The application was constructed by inserting
existing modules or user modules into the AVS workspace,
and by linking their inputs and outputs. The validated DFG
was next converted into a new DFG by a translator to be compliant with SynDEx algorithm input. The main advantage
was the automatic visualization of intermediate and resulting
images at the input and output of each module. This characteristic enables the image processing designer to check and
validate the functionality of the application with AVS before
the step of the implementation.
Although SynDEx is basically a CAD tool for distribution/scheduling and code generation, here we demonstrate
that SynDEx can also be directly used as the front-end of
the process for functional checking (as it is possibly done
with AVS). This is made possible thanks to our kernels presented in Section 3. The design process is now based on a single tool and is therefore simpler and more efficient. SynDEx
therefore enables full rapid prototyping from the application
description (DFG) to final multiprocessor implementation
(Figure 2) in three steps as the following:
2
3
Texas instrument.
www.avs.com
4
3.
Step 1
Sequential executive (PC) target Functional
checking
visual C++ application
Step 2
SynDEX
Sequential executive (PC)
with chorno. primitives
visual C++ application
Sequential executive (DSP)
with chorno. primitives
code composer application
Nodes
timing
estimation
Step 3
Distributed executive
(PC + DSPs)
As described above, the SynDEx generic executive will be
translated into a compilable language. The translation of
SynDEx macros into the target language is contained in library files (also called kernels). The final executive for a
processor is static and composed of one computation sequence and one communication sequence for each medium
connected to this processor. Multicomponent platform manufacturers must insert additional digital resources between
processors to make communication possible. Thus, SynDEx
kernels depend on specific platforms.
3.1.
Parallel
application
Figure 2: SynDEx utilization global view.
Step 1. The user creates the application DFG using SynDEx.
Automatic code generation provides a standard C code for a
single host computer (PC) implementation (SynDEx PC kernel). In this way, the user can design and check each C function associated with each vertex of its DFG, and can check the
functionalities of the complete application with any standard
compilation tools. With automatic code generation, visualization primitives or binary error rate computation can be
used for easy functional checking of algorithms. The user can
easily check his or her own DFG on a cluster of PCs interconnected by TCP buses. With this cluster, the user can emulate
his or her embedded platform thanks to SynDEx distributed
scheduling.
Step 2. The developed DFG is then used for automatic prototyping on monoprocessor targets so that to chronometric reports are automatically inserted by the SynDEx code generator. Each duration associated with each function (i.e., vertex)
executed on each processor of the architecture graph is automatically estimated using dedicated temporal primitives.
Step 3. The user can easily use these durations to characterize the algorithm graph by entering these values in SynDEx.
Then, SynDEx tool executes an adequation (optimized distribution/scheduling) and generates a real-time distributed and
optimized executive according to the target platform. Several
platform configurations can be simulated (processor type,
their number, and also different media connections).
The main advantage of this prototyping process is its
simplicity because most of the tasks performed by the user
concern the description of an application and a compiling
environment. Only a limited knowledge of SynDEx and compilers is required. All complex tasks (adequation, synchronization, data transfers, and chronometric reports) are executed automatically, thus reducing the “time to market.” The
user can rapidly explore several design alternatives by modifying the architecture graph or adding constraints.
SYNDEX EXECUTIVE KERNELS
Development platforms
Different hardware providers (Sundance, Pentek) were chosen to validate automatic executive generation. Many component and intercomponent communication links are used
in their platforms, ensuring accuracy and the generic aspect
of the approach. The use of several hardware architectures
guarantees generic kernel developments.
Sundance4 platform: A typical Sundance device is made
up of a host PC with one or more motherboards, each supporting one or more TIMs (Texas instrument module). A
TIM is a basic building block from which you build your system. It contains one processing element, which is not necessarily a DSP, but an Input/Output device, or an FPGA. A
TIM also provides mechanisms to transfer data from module to module. These mechanisms, such as SDBs (200 MB/s),
CPs (20 MB/s), or a global bus (to access a PCI bus up to
40 MB/s), are implemented on the TIMs using FPGAs.
The SMT320 motherboard (Figure 3) is modular, flexible, and scalable. Up to four different modules can be
plugged into the SMT320 and connected using CP or SDB
cables. The SMT361 TIM with a TMS320C6416 (400 Mhz)
is very suitable for imaging processing solutions as the
TMS320C64xx has special functions for handling graphics. The SMT319 TIM is a framegrabber, which includes a
TMS320C6414 and two nonprogrammable devices: a BT829
PAL to YUV encoder, and a BT864 YUV to PAL decoder.
These two devices are connected to the TMS320C6414 DSP
thanks to two FIFOs, which are equivalent to SDBs with the
same data rate. An SMT358 is composed of a programmable
Virtex FPGA (XCV600) which integrates specific communication links and specific IP blocks (computation).
Pentek5 platform. The Pentek p4292 platform (Figure 4)
is made up of four TMS320C6203 DSPs. Each DSP has three
communication links: two bidirectional (300 Mhz) interDSP links and one for the Input/Output interface. The four
DSPs are already connected to each other in a ring structure. Some daughterboards may be added to the p4292
thanks to the VIM (velocity interface mezzanine) bus, such as
analog-to-digital converters (ADC p6216), digital-to-analog
converters (DAC p6229), or FPGAs (XC2V3000, XC2Vx Virtex2 family).
4
5
http://sundance.com/
http://www.pentek.com/
M. Raulet et al.
5
PC (pentium)
Personal computer
PCI
Embedded motherboard: SMT320
DSP2 (TMS320C6416)
SDBa
SDBb
CP0
CP1
PCI
PCI (Bus)PCI
SMT361
Bus 6 (CP)
FPGA1 (Virtex)
SDBa
SDBb
CP0
CP1
CP2
CP3
Bus 3 (SDB)
SMT358
Bus 1 (SDB)
DSPC3 (TMS320C6414)
SDBa
SDBb
VID in
VID out
In (VID in)
Out (VID out)
PAL to YUV (BT829)
VID in
YUV to PAL (BT864a)
VID out
SMT319
Figure 3: Example of Sundance architecture topology.
This stand-alone Pentek platform is connected to an Ethernet network. This allows TCP/IP (1.5 MB/s) communications between DSPs and any computer in the network in
order to check a binary error rate, or to visualize a decoded
image. However, this Bus’s throughputs will not authorize
the transfer of uncompressed data.
3.2. Software component kernels
Most of the kernels are developed in C language so that they can
be reused for any C software programmable device. These
kernels are similar for the host computer (PC) and the embedded processors (DSPs). The generated executive is composed of a sequential list of function calls (one for each
DFG operation). This kind of executive and the fact that the
adapted C compiler for DSPs has really improved in terms of
resource use mean that the gap between an executive written in C and an executive written in an assembly language
is narrow. The user can design each function associated with
each vertex of its DFG in C or assembly language for better
results [15].
SynDEx creates a macrocode made of several interleaved
schedulers: one for computation and the others for communications allowing parallelism of those actions. We have chosen to use multichannel enhanced DMA (direct memory access) transfers, thus maximizing parallelism and timing performance. Data transfers are executed in parallel with computation minimizing communication duration. DMA and
CPU have their own bus to access the internal memory,
therefore bus conflicts only appear when CPU and DMA
access an external memory. As all data buffers are in internal memory, memory bus conflicts are null between CPU
and DMA accesses. Communication overhead is only due to
DMA setup which is negligible to take transfers into account
(a few assembly instructions) [16].
The development of an application on TI processors
can be hand-coded with TI RTOS (real-time operating system) called DSP/BIOS [17]. DSP/BIOS is well-suited for
multithread monoprocessor applications. Several processors
must be connected to improve computational performances
and reach real-time performances. In this case, the multithread multiprocessor 3L diamond6 RTOS is more appropriate for this situation than DSP/BIOS. Applications are
built as a collection of intercommunicating tasks. The mapping and scheduling of each task are chosen manually. Then
data transfers and synchronizations are implemented by the
RTOS using precompiled libraries. 3L enables multiprocessor application development easier, faster, and suited to dynamic communications between tasks. Data transfers are realized using DMA, but without any computation parallelism
which is nearly equivalent to polling technique.
Data transfers in a signal processing application are generally statically defined both in terms of data type and
number so that their description with a DFG is suitable.
The execution of DFG operations is also well defined so that
6
http://www.31.com
6
PC (Pentium)
TCP
Personal computer
ADC
daughterboard
ADC 1 (DAC)
VIM
TCP (TCP)
VIM 1 (BIFO)
DSP-A (TMS320C6203)
XX
YY
IO
TCP
DSP-C (TMS320C6203)
XX
YY
IO
TCP
P4292 motherboard
DSP-B (TMS320C6203)
XX
YY
IO
TCP
Bus 4 (BIFO) Bus 2 (BIFO)
Bus 1 (BIFO)
Bus 3 (BIFO)
DSP-D (TMS320C6203)
XX
YY
IO
TCP
Embedded boards
Figure 4: Pentek 4292 motherboard and its daughterboard.
data transfers can be implemented with static processes. As
static processes are faster than dynamic ones, SynDEx kernels are developed without any RTOS. That is to say that the
SynDEx generic executive is not transformed into dynamic
RTOS functions, but into specific static optimized functions.
3.3. Communication media kernels
With AAA methodology, two different models are possible for communication media between processors: the SAM
(single access memory) and RAM (random access memory,
shared memory) models.
The SAM model corresponds to FIFOs in which data are
pushed by the producer if it is not full, and then pulled by
the receiver if the FIFO is not empty. Synchronizations between the two processors are hardware signals (empty and
full flags) and are not handled by SynDEx semaphores. The
data must be received in the same order as it is sent. Most of
our kernels are designed according to this model. SDBs, CPs,
and BIFO DMAs enable parallelism between calculation and
communications, whereas TCP and BIFO do not enable it
(data polling mechanism).
The RAM model corresponds to an indexed shared memory. A memory space is allocated, and an interprocessor synchronization semaphore is created for each item of data that
has to be transferred. This mechanism allows the destination
processor to read data in a different order to which it has been
written by the source processor. Interprocessor synchronizations are handled by SynDEx. The first implementation of
the RAM model, through the PCI bus, is described in the following section.
A PCI transfer kernel, for communications between a
DSP on Sundance platforms and the host computer, is first
developed with the SAM model. First, the host and DSP must
be synchronized. Each data transfer therefore encloses two
synchronizations because the PCI bus does not have hardware signals like a usual FIFO (full or empty flag). The receiver must first wait for the sender to write new data in the
PCI memory. Then, the receiver can read data from the PCI
memory and send an acknowledgement back to the sender.
This “rendez-vous as soon as possible mechanism” induces
idle or wait states, but is mandatory to ensure the medium
is ready for the next transfer and to guarantee transfer order.
PCI communications using the SAM model reach a maximum transfer rate of 16 MB/s. This mechanism drastically
slows down PCI transfers. In addition, a shared buffer is actually allocated to the PC’s RAM by the PCI bus driver. Therefore, a new PCI kernel implementing the RAM model has
been developed, and the transfer rate has been improved (up
to 40 MB/s). Each item of data that has to be transferred
has its own address allocation in the PCI memory and corresponding semaphores, which allows several buffers to be
written before the first one is read. This results in less wait
states and more time for computation. The PCI scheduler
is controlled by interrupt when using this model. Consequently, communications and computations can be concurrent on the DSP, thus reducing overall execution time.
3.4.
Hardware component kernel
Moreover, an FPGA kernel for programmable hardware
components has been developed in HDL (hardware description langage) and could be considered as a coprocessor in
order to speed up a specific function of the algorithm. This
kernel handles automatic integration of intercomponent
communication syntheses and instantiates a specific IP (intellectual properties) block.
M. Raulet et al.
7
Code generation
Generic
SynDEX.m4x
Architecturedependent
Processor typedependent
C62x.m4x
C64x.m4x
Pentium.m4x
FPGA.m4x
Application-dependent
Application name.m4x
Media-type-dependent
SDB.m4x (C62x, C64x, FPGA)
CP.m4x (C62x, C64x, FPGA)
Bus-PCI-SAM.m4x (C62x, C64x, Pentium)
Bus-PCI-RAM.m4x (C62x, C64x, Pentium)
TCP.m4x (Pentium, C62x)
BIFO.m4x (C62x, C64x, FPGA)
BIFO-DMA.m4x (C62x, C64x, FPGA)
Figure 5: SynDEx kernel organization.
Programming of a communication link depends on its
type, but also on the processor. Previous works have already validated these libraries [18], however, they need to
evolve with processors or communication links (depending
on provider’s additional logic).
3.5. Kernel organization
Table 1: Legenda of UMTS FDD transmitter.
SRC
CRC
SEG
COD
EQU
INT1
INT2
SPRdata
SPRctrl
SUM
CST-SCR-code
SCR
DPCCH
PSH
Source (pseudorandom generator)
Add of cyclic redundancy check bits
Segmentation
Channel coding
Equalization
First interleaving
Second interleaving
Spreading of information bits
Spreading of control bits
Creation of a complex signal
Generation of the scrambling code
Scrambling
Generation of control bits
Pulse shaping
The libraries are classified to make developments easier and
to limit modifications when necessary. As shown in Figure 5,
these files are organized in a hierarchical way. An applicationdependent library contains macros for the application, such
as the calls of the algorithm’s different functions. A generic library contains macros used regardless of the architecture target (basic macros). The others are architecture-dependent:
processor or communication type-dependent. Processordependent libraries contain macros related to the real-time
kernel, such as memory allocations, interrupt handling, or
the calculation sequence. Communication type-dependent
libraries contain macros related to communications: send,
receive, and synchronization macros, communication sequences. As different processor types (with different programming of the link) can be connected by the same communication type, one part per processor type can be found
in one library. The right part of the file is used during the
macroprocessing.
Kernels have been developed for every component of the
platforms described in Section 3.1. When SynDEx is used for
a new application, only the application-dependent library
needs to be modified by the user. Architecture-dependent
libraries are added or modified when a new architecture is
used (a processor or a medium that does not have its kernel).
application and signal processing layers are very demanding. This partially explains the delay in the effective arrival
of UMTS on the market. It presents a very interesting case
study for high efficiency multiprocessing heterogeneous implementations. This becomes even more relevant in a software radio [19] context, which aims to implement as much
radio processing as possible in the digital domain, and especially onto processors and reconfigurable hardware. The
advantages firstly consist of easing the system design, while
privileging fast software instead of heavy low-level hardware
development. Secondly, the system supports new services
and features thanks to software adaptation capability during
system operation [20].
4.
4.1.
UMTS APPLICATION
UMTS is much more challenging than previous 2G systems, such as GSM. In particular, UMTS signals have a
3.84 MHz bandwidth compared with 270 kHz for GSM. Both
General description
UMTS FDD physical layer algorithms explained in [5] are
implemented for baseband from cyclic redundancy check
(CRC) to pulse shaping (PSH) (Table 1) for the transmitter
8
Frame/frame
CST SCR code
Transport block
SRC
CRC
SEG
COD
EQU
INT1
INT2
SPR data
SCR
SUM
DPCCH
SPR ctrl
PSH
Slot/slot
Figure 6: UMTS FDD transmitter (Tx).
Frame/frame
CST SCR code
Transport block
BER
DCRC
Slot/slot
DSEG
DCOD
DEQU
DINT1
DINT2
DSPR data
DSCR
RAKE
MFL
Figure 7: UMTS FDD receiver (Rx).
as shown in the DFG in Figure 6. This does not represent
a total real UMTS since synchronization is artificial and no
propagation channel is used (the link is completely digital).
Data may be generated by an arbitrary source (SRC Figure 6:
not in the standard) for bit-error-rate verifications or extracted from a real application, such as a video stream, to
make demonstrations.
Link characteristics in the measured version are as follows:
(i)
(ii)
(iii)
(iv)
(v)
1 transport channel,
1 physical channel,
no channel coding,
spreading factor of 4,
data rate of 950 kbps.
The receiver [5] extracts the information necessary for
the application using the scheme represented in Figure 7
(Table 2).
The number of operations effectively in use is much
greater than the figures shown, as most of them are duplicated several times. The generation of a 10 ms frame (composed of 15 slots) requires the instantiation of approximately
140 operations for Tx and 240 for Rx in this version, which is
a minimum. The granularity of the operations has the same
level of complexity as a FFT, FIR, or a memory reorganization.
4.2. FIR implementation
The filter operation is of particular interest because its implementation complexity makes it very resource consuming. This is a FIR (finite impulse response) with a raisedroot cosine impulse response specified by the UMTS standard at both transmitter baseband output and receiver baseband input. Here, the impulse response is symmetric around
its center; this characteristic can be exploited to minimize
the number of memory accesses, the required memory for
storing the filter coefficients and the number of multiplication operations. In order to obtain a convenient rejection of
contiguous bands, the filter impulse response is spread over
16 chips and consequently has 33 taps with an oversampling
of 2. The same coefficients are used for Tx and Rx.
Equation (1) gives us the representation of a FIR filter
with an odd number of coefficient, where h is the real coefficient vector of the filter impulse response (filter taps), K is
number of coefficients (or taps), x[n] and y[n], the nth input
and output complex data samples, respectively.
y[n] = h
+
K −1
K −1
·x n−
2
2
(K −
1)/2−1
h[k] · x[n − k] + x[n − K + 1 + k] .
k=0
(1)
A real filter (i.e., filter whose coefficients are real) applied
to complex data is very frequent in baseband (BB) processing and consists of applying the same filter independently
to the real and imaginary parts of the data samples. In our
case we are interested in fixed point implementations, so care
must be taken to avoid overflow while preserving signal quality (in terms of SNR). The filter at Tx is called pulse shaping (PSH), and at Rx matched filtering (MFL). At Tx PSH
and oversample (which consists of inserting zero between binary digits), operation can be combined in order to minimize computation. In this case we obtain the following: if n
is even,
y[n] =
(K
−1)/4
k=0
h[2k] · x[n − k] + x
n − (K − 1)
+k
2
,
(2)
M. Raulet et al.
9
Table 2: Legenda of UMTS FDD receiver.
MFL
h(0, . . . , K)
Matched filter
Simplified RAKE (one perfectly
synchronized finger)
RAKE
CST-SCR-code
DSCR
DSPRdata
DINT2
DINT1
DEQU
DCOD
DSEG
DCRC
BER
x(−K + 1, . . . , −1, 0, . . . , N − 1)
Generation of the scrambling code
Descrambling
Despreading of information bits
Deinterleaving 2
Deinterleaving 1
Equalization inverse operation
Channel decoding
Transport block extraction
Analysis of cyclic redundancy check bits
Bit error rate
State update
Figure 8: Data management for DSP implementation of an FIR.
Table 3: Timing of PSH (input: 2560 samples).
Target
if n is odd,
y[n] = h
+
K −1
K −1
·x n−
2
2
(K −
1)/4−1
k=1
y(0, . . . , N − 1)
State New data
Time/slot (microseconds)
FIR
(K taps)
C62x
300 Mhz
576
C64x
400 Mhz
320
XC2Vx
100 Mhz
338
Table 4: Timing of MFL (input: 5120 samples).
Target
(K − 1)
h[2k] · x[n − k]+x n −
+k
2
.
Time/slot (microseconds)
(3)
The nature of a FIR operation is particularly suited to
FPGA implementations, but can also be implemented on
DSP processors. A specific characteristic of the DSP is that
it has a MAC (multiply accumulate) or a VLIW structure to support filtering computing in one clock cycle. The
TMS320C6x family, based on VLIW architecture, has six
adders and two multipliers, which operate in parallel and
complete execution in one clock cycle. A fixed point multiply
accumulate takes two instructions: multiply on one cycle and
accumulate on the next. Thanks to pipelining, it is possible to
effectively compute two multiply accumulates per cycle.
The performance then directly depends on filter length
and processor clock frequency as each tap is processed sequentially. In an FPGA, it is possible to parallelize part or
all of these operations, depending on the available gate surface. FIR implemented in the FPGA is a distributed arithmetic (DA) filter [21]. Features of this FIR are not multipliers, but only read only memory (ROM) and accumulators.
The complexity of this filter only depends on the number of
bits per sample, not on the number of taps.
In the particular case of C6x, it is possible to use a data
buffer organization of the FIR as shown in Figure 8. FIR is
a typical case where functional units in the microprocessor
datapath can speed up processing. Data is processed in
blocks. The interface consists of an input data buffer, the coefficient buffer, and an output data buffer.
The algorithm for each input sample performs the function of y[n] in a for-loop. At the end of each block processing operation, the filter state is updated by copying the last K
input data into a state buffer (Figure 8). For the sake of processing efficiency, it is assumed that the input data buffer is
stored in a memory after the state data buffer so that negative
C62x
300 Mhz
1130
C64x
400 Mhz
640
XC2Vx
100 Mhz
338
Table 5: Tx timings and PSH ratio.
Target
Sundance
Configuration
Time/frame
PSH ratio
1∗ C64x
1∗ C62x
9.5 ms
50%
11.8 ms
73%
Pentek
1∗ XC2Vx
2∗ C62x
1∗ C62x
8.5 ms
9.6 ms
53%
52%
indices of the input data buffer point to the state buffer data.
In Tables 3 and 4, the differences in timing between C62x
and C64x (without taking clock rates into account) are due
to the fact that compilers are not the same for each processor, and that those DSPs have different internal architectures. In an FPGA (XC2Vx), this FIR operation could be
more parallelized giving better acceleration to the detriment
of the gate surface. However, these time values are sufficient
to get a Tx or Rx real-time application, that is why we use
the same FIR implementation for PSH and MFL. An elementary oversampling function just has to be added before
PSH. On the contrary to FPGAs, we take advantage of the
FIR features (cf. Section 4.2) on DSPs to optimize and divide by 2 the computation complexity of PSH at Tx, so that
576 microseconds versus 1130 microseconds are obtained on
C62x, and 320 microseconds versus 640 microseconds on
C64x.
4.3.
Tx and Rx implementations
Four different implementations (Table 5) of a UMTS transmitter have been automatically tested using SynDEx: three
are implemented on Pentek platform and one on Sundance
platform. A transmitter application must last under 10 ms to
be real time.
10
Table 6: Rx timings and MFL ratio.
Target
Sundance
∗
Pentek
∗
Configuration
1 C64x
1 C62x
Time/frame
MFL ratio
15.9 ms
60%
20.2 ms
84%
1∗ XC2Vx
1∗ C62x
9.9 ms
32%
Principally, due to PSH (Table 5, timing PSH ratio compared to a Tx implementation), the first transmitter implementation onto the Pentek platform did not reach real time
with one C62x DSP, however, it is possible to parallelize PSH
in order to process half of the samples on two processors. Before filtering, two buffers of 1296 samples (as described in
Figure 8) must be created. Each block processing operation
overlaps 16 transient samples. The length of this PSH is reduced by 1.5 when transfers are taken into account.
Furthermore, code generation and kernels can be used
to quickly shift to another platform. UMTS prototyping on
the Sundance platform required indeed few hours to reach to
a real-time transmitter application, thanks to our previous
works (UMTS algorithm description, SynDEx code generation and kernels) on Pentek platform. This is a tremendous
proof of the portability capabilities offered by the methodology.
UMTS Rx has been implemented according to three different configurations (Table 6). A real-time application has
been achieved on the Pentek platform with one DSP and
one FPGA. MFL parallelization is also possible on several
DSPs on Pentek platform, however, more than two DSPs are
added compared with one FPGA in the previous configuration. A configuration with 4 DSPs requires many transfers in
the Pentek ring structure, thus not reducing MFL computation length by too much.
5.
MPEG-4 OVER UMTS: A MULTILAYER SYSTEM
MPEG-4 is the latest compression standard. An MPEG-4
codec can be divided into ten main parts (e.g., system, visual, and audio) with different timing requirements and execution behaviors. Each part is divided into profiles and levels
for the use of the tools defined in the standard. Each profile
(at a given level) constitutes a subset of the standard so that
MPEG-4 can be seen as a toolbox where system manufacturers and content creators have to select one or more profiles
and levels for a given application. The application handled
here is an MPEG-4 part 2 codec developed in our laboratory,
which is based on the Xvid7 codec. This MPEG-4 codec has
also been tested on several distributed platform configurations [7] (multi-DSP implementation). Here, our aim is to
interface UMTS with MPEG-4 to provide a bitstream to the
UMTS application.
The methodology permits to merge the design of very
different (heterogeneous) parts of the system in terms of
hardware processing support (PC, DSP, FPGA) as well as
7
www.xvid.org
processing nature. A conventional methodology would require different environments, which is a cause of bugs and incompatibility at the integration step. This causes delays in the
best case, and could even completely question the design in
the worst case. Our approach permits to gather the different
parts of the design very early in the design flow and anticipate
integration issues. Nevertheless, MPEG-4 over UMTS arises
a new difficulty: the complete application is a multilayer system (two layers MPEG-4 and UMTS) with different data periodicities between layers. A consequence is that the whole
application cannot be represented by a single DFG. The solution consists of breaking up the UMTS physical layer and
the video codec layer into four algorithm subgraphs. Then
these subgraphs (coder, decoder, modulation, and demodulation) have been implemented onto several processors connected each other with media (FIFO) following the topology
of Figure 9.
The MPEG-4 codec is not embedded here: firstly, TCP
throughputs on the Pentek platform do not enable uncoded
or uncompressed data to be transferred, and secondly, too
few Sundance TIMs are available in our laboratory to embed a complete application with UMTS + MPEG-4. Our realtime MPEG-4 codec provides the maximum data rate supported by our UMTS transceiver (950 kbps). An MPEG-4
bitstream, coded on a PC, is sent via a UMTS telecommunication link to another PC to be decoded. Once the communication transceiver has been implemented on a platform, it
can be viewed as a communication medium equivalent to a
FIFO.
So the platform integrating the MPEG-4 codec could be
described as two PCs interconnected by a UMTS communication medium. A FIFO is used to connect asynchronous
applications (codec to UMTS communication link). Asynchronous means different periodicities and different data exchange formats. A codec cycle corresponds to one image processing operation producing a variable compressed bitstream
in a variable time (about 40 ms). A UMTS cycle executes
one fixed size frame in 10 ms. FIFO material signals (empty
and full flags) ensure the self-regulation of the global system
(UMTS + MPEG-4). Two implementations of this global system have been rapidly done onto two platforms thanks to
developed kernels as described in Figure 9. The global system runs in real time on Pentek platform and is not far from
real time on Sundance platform (Rx is in 16 ms and must
be 10 ms). The first implementation of the global application
on Pentek platform takes quite a long time (two months) to
find and solve the multilayer issue, but this implementation
is instantaneously transposed on Sundance platform, which
exactly illustrates the efficiency and the pertinence of the approach.
6.
CONCLUSIONS AND OPEN ISSUES
The design process proposed in this paper covers every step,
from simulation to integration in digital signal application
development. Compared with a manual approach, the use of
our fast prototyping process ensures easy reuse, reduced time
to market, design security, flexibility, virtual prototyping, efficiency, and portability.
M. Raulet et al.
11
MPEG-4
coder
UMTS
modulation
FIFO
PC
TCP
PCI bus
FPGAs
+
DSPs
Sundance
Pentek
UMTS
demodulation
FIFO
SDB
BIFO
FIFO
FPGAs
+
DSPs
TCP
Sundance
PCI bus
Pentek
MPEG-4
decoder
PC
Figure 9: MPEG-4 over UMTS.
On the one hand, we have shown how SynDEx is capable
of manually or automatically exploring several implementation solutions using optimization heuristics, and on the other
hand, how it automatically generates dedicated distributed
real-time executives from kernels dependent on the processors and the media. These executives are dedicated to the application because they do not use any support of RTOS, and
are generated from the results of the adequation taking operation and data transfer distribution and scheduling into account while providing synchronizations between operations
and data transfers and between consecutive repetitions of the
DFG. The kernels enable recent multiprocessor platforms to
be used and also enable the process to be extended to heterogeneous platforms. It was tested on several different architectures composed of TI TMS320C6201, TMS320C6203,
TMS320C6416 DSPs, Xilinx Virtex-E and Virtex-II FPGAs,
and PCs.
The calculations and data transfers are executed in parallel. RAM and SAM communication models have been tested
for PCI transfers. Higher transfer rates are reached using the
RAM model enabling real-time video transfers between a PC
and a DSP platform.
Several complex tasks are performed automatically, such
as distribution/scheduling, code generation of data transfers
and synchronizations. So the development of a new application is limited to the algorithm description and to the adaptation of kernels for platforms or components. Furthermore,
as the C language is used and there is a large number of tested
topologies, developed DSP kernels can easily be adapted to
any other DSP and communication media.
The current MPEG-4 + UMTS application is still in
progress to achieve wireless communication. A new version
already integrates the channel coding (Turbo code) steps,
which only slightly increases the overall complexity.
This approach ensures fast prototyping of digital signal applications over heterogeneous parallel architectures in
many technological fields. Other applications have already
taken advantage of it. A SynDEx description of a MC-CDMA
(probably planned as 4G) application has been developed
by the IETR SPR laboratory [22]. LAR codec is a video
codec studied in the IETR Image Group Laboratory. A similar scheme (Figure 9) has already been tested on different
configurations: LAR over MC-CDMA, MPEG-4 over MCCDMA, and LAR over UMTS.
The complex MPEG-4 + UMTS application stresses that
a multilayer system presents some specific characteristics in
terms of data flow. In the future, this case study may be capitalized on creating in SynDEx new hierarchical models of
architecture graphs in such a way the physical layer (telecommunication link) may appear as a particular medium. Another issue is the memory allocation in SynDEx. At each output of each vertex, SynDEx creates an allocation. At this time,
memory allocations are reordered and reused manually to
give an optimal solution. Current works deal with an automatic solution, based on graph coloring techniques and life
memory allocation.
REFERENCES
[1] A. M. Eltawil, E. Grayver, H. Zou, J. F. Frigon, G. Poberezhskiy, and B. Daneshrad, “Dual antenna UMTS mobile station transceiver ASIC for 2 Mb/s data rate,” in Proceedings of
IEEE International Solid-State Circuits Conference (ISSCC ’03),
vol. 1, pp. 146–484, San Francisco, Calif, USA, February 2003.
[2] K. Keutzer, S. Malik, A. R. Newton, J. M. Rabaey, and A.
Sangiovanni-Vincentelli, “System-level design: orthogonalization of concerns and platform-based design,” IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems, vol. 19, no. 12, pp. 1523–1543, 2000.
[3] T. A. Henzinger, C. M. Kirsch, M. A. A. Sanvido, and W. Pree,
“From control models to real-time code using Giotto,” IEEE
Control Systems Magazine, vol. 23, no. 1, pp. 50–64, 2003.
[4] S. S. Bhattacharyya, P. K. Murthy, and E. A. Lee, Software Synthesis from Dataflow Graphs, Kluwer Academic, Norwell, Mass,
USA, 1996.
[5] 3GPP TS 25.213 v3.3.0: Spreading and Modulation FDD, release 1999.
[6] F. Pereira and T. Ebrahimi, The MPEG-4 Book, Prentice-Hall
PTR, Upper Saddle River, NJ, USA, 2002.
[7] N. Ventroux, J. F. Nezan, M. Raulet, and O. Déforges, “Rapid
prototyping for an optimized MPEG-4 decoder implementation over a parallel heterogenous architecture,” in Proceedings of 28th IEEE International Conference on Acoustics, Speech,
and Signal Processing (ICASSP ’03), vol. 2, pp. 433–436, Hong
Kong, China, April 2003, Conference cancelled - Invited paper,
ICME 2003.
[8] Y. Sorel, “Massively parallel computing systems with real
time constraints: the “Algorithm Architecture Adequation”
methodology,” in Proceedings of 1st IEEE International Conference on Massively Parallel Computing Systems (MPCS ’94), pp.
44–53, Ischia, Italy, May 1994.
[9] T. Grandpierre, C. Lavarenne, and Y. Sorel, “Optimized rapid
prototyping for real-time embedded heterogeneous multiprocessors,” in Proceedings of 7th International Workshop on Hardware/Software Codesign (CODES ’99), pp. 74–78, Rome, Italy,
May 1999.
12
[10] Y. Sorel, “Real-time embedded image processing applications
using the A3 methodology,” in Proceedings of IEEE International Conference on Image Processing (ICIP ’96), vol. 2, pp.
145–148, Lausanne, Switzerland, September 1996.
[11] T. Grandpierre and Y. Sorel, “From algorithm and architecture specifications to automatic generation of distributed realtime executives: a seamless flow of graphs transformations,” in
Proceedings of 1st ACM and IEEE International Conference on
Formal Methods and Models for Co-Design (MEMOCODE ’03),
pp. 123–132, Mont Saint-Michel, France, June 2003.
[12] F. Balarin, L. Lavagno, P. Murthy, and A. SangiovanniVincentelli, “Scheduling for embedded real-time systems,”
IEEE Design and Test of Computers, vol. 15, no. 1, pp. 71–82,
1998.
[13] L. A. Hall, D. B. Shmoys, and J. Wein, “Scheduling to minimize
average completion time: off-line and on-line algorithms,” in
Proceedings of 7th Annual ACM-SIAM Symposium on Discrete
Algorithms (SODA ’96), pp. 142–151, Atlanta, Ga, USA, January 1996.
[14] V. Fresse, O. Déforges, and J. F. Nezan, “AVSynDEx: a rapid
prototyping process dedicated to the implementation of digital image processing applications on Multi-DSP and FPGA
architectures,” EURASIP Journal on Applied Signal Processing,
vol. 2002, no. 9, pp. 990–1002, 2002, Special Issue on implementation of DSP and communication systems.
[15] Texas Instruments, “TMS320C6000 Optimizing Compiler
User’s Guide,” reference spru187l, March 2004.
[16] Y. Le Méner, M. Raulet, J. F. Nezan, A. Kountouris, and C.
Moy, “SynDEx executive kernel development for DSP TI C6x
applied to real-time and embedded multiprocessors architectures,” in Proceedings of 11th European Signal Processing Conference (EUSIPCO ’02), Toulouse, France, September 2002.
[17] Texas Instruments, “TMS320 DSP/BIOS User’s Guide,” reference spru423b, September 2002.
[18] F. Nouvel, S. Le Nours, and I. Herman, “AAA methodology
and SynDEx tool capabilities for designing on heterogeneous
architecture,” in Proceedings of 18th Conference on Design of
Circuits and Integrated Systems (DCIS ’03), Ciudad Real, Spain,
November 2003.
[19] A. Kountouris, C. Moy, and L. Rambaud, “Reconfigurability:
a key property in software radio systems,” in Proceedings of 1st
Karlshruhe Workshop on Software Radios, Karlsruhe, Germany,
March 2000.
[20] C. Moy, A. Kountouris, and A. Bisiaux, “HW and SW architectures for over-the-air dynamic reconfiguration by software
download,” in Proceedings of Software Defined Radio workshop
of IEEE Radio and Wireless Conference (RAWCON ’03), Boston,
Mass, USA, August 2003.
[21] S. A. White, “Applications of distributed arithmetic to digital signal processing: a tutorial review,” IEEE ASSP Magazine,
vol. 6, no. 3, pp. 4–19, 1989.
[22] S. Le Nours, F. Nouvel, and J. F. Helard, “Example of a CoDesign approach for a MC-CDMA transmission system implementation,” in Journées Francophones sur l’Adéquation Algorithme Architecture (JFAAA ’02), Monastir, Tunisie, December 2002.
M. Raulet received his postgraduate certificate in signal, telecommunications, images,
and radar sciences from Rennes University
in 2002, and his Engineering degree in electronic and computer engineering from National Institute of Applied Sciences (INSA),
Rennes Scientific and Technical University
in 2002, where he is currently working as
a Ph.D. student in collaboration with Mitsubishi Electric. His interests include image
compression and telecommunication algorithms and rapid prototyping.
F. Urban received his postgraduate certificate in signal, telecommunications, images,
and radar sciences from Rennes University in 2004, and his Engineering degree in
electronic and computer engineering from
INSA, Rennes Scientific and Technical University in 2004. He is currently working as a
Ph.D. student in collaboration with Thomson R&D, France. His interests include image compression, rapid prototyping, DSP
optimization, and codesign.
J.-F. Nezan is an Assistant Professor at National Institute of Applied Sciences (INSA)
of Rennes and a Member of the IETR Laboratory in Rennes. He received his postgraduate certificate in signal, telecommunications, images, and radar sciences from
Rennes University in 1999, and his Engineering degree in electronic and computer
engineering from INSA, Rennes Scientific
and Technical University in 1999. He received his Ph.D. degree in electronics in 2002 from the INSA. His
main research interests include image compression algorithms and
multi-DSP rapid prototyping.
C. Moy has been an Engineer at the National Institute of Applied Sciences (INSA),
Rennes Scientific and Technical University,
France, 1995. He received his M.S. and
Ph.D. degrees in electronics in 1995 and
1999 from the INSA. He worked from 1995
to 1999 on spread-spectrum and RAKE receivers for the Institute on Electronics and
Telecommunications of Rennes (IETR). He
then worked 6 years at Mitsubishi Electric
ITE-TCL Research Laboratory where he was focusing on software
radio systems and concepts. He is now an Assistant Professor at
Supélec. His research is done in the SCEE Laboratory of the UMR
CNRS 6164 IETR, which focuses on cognitive radio. He addresses
heterogeneous design techniques for SDR as well as cross-layer optimization topics.
M. Raulet et al.
O. Deforges is a Professor at National
Institute of Applied Sciences of Rennes
(INSA). He graduated in electronic engineering from the École Polytechnique, University of Nantes, France, in 1992, where he
also received a Ph.D. degree in image processing in 1995. In 1996, he joined the Department of Electronic Engineering at the
INSA, Rennes Scientifc and Technical University. He is a Member of the UMR CNRS
6164 IETR Laboratory in Rennes. His principal research interests
are image and video lossy and lossless compressions, image understanding, fast prototyping, and parallel architectures.
Y. Sorel is a Research Director at INRIA
(National Institute for Reseach in Computer Science and Control) and a Scientific Leader of the Rocquencourt’s Team
AOSTE (Analysis and Optimization for Systems with Real-Time and Embedding Constraints). His main research topics are modeling of distributed real-time systems with
graphs and partial order, uniprocessor and
multiprocesseur real-time scheduling optimizations of systems with multiple constraints, and automatic code
generation for hardware/software codesign. He is also the Founder
of SynDEx, a system-level CAD software distributed free of charge
at www.syndex.org.
13
+0
@;
# "
0
5 $
0 (1#/#"("#
/ +<
+" 1
! ( 3# #)
3
+0
+0
'-
504
$
"#0 -
-
,(
"
&
&
,
> 6
K
92
.$
,$
# #$
03
& &%
: 7!
2
1$ $
!
1 "
'-
" "
'- "
$!% <
6
9
6
'9
>
9
69
6
9
(
6
A9
(
,9
>
,$
'0# "
, ' 9
c d6
9
* 6
,$
'0#
H cBd ,
9
K
9
( &
+
9
6
*
9
>
9
<
G
<
> >
'
>
>
>6
>M
>( 6 <
>
! ,<
$ 9
(
K
+
%
/
"
#
c ( d,
>M
>
9
9>
9
A 69
> %
.
)
H<
G
>M
3
>M
6
2
)
@
6
(K
2
9
$
5 '
> %
+ K )
<
<
9
6
A
9
>M
>
WiFi
Randomiser
WiMAX
Convolutional
Coder
Interleaver
UMTS
Constellation
Mapper
LUT
Flip Flop
<
Scrambler
Encoder
Real
Spreader
Butterfly
Adder
NAND
) .!
RS
FFT-N
NOT
-M
>
&
&
%
%
9
Over
Sampler
Multiplier
XOR
#
Spreader
AND
! .&
9
OR
$!% <
&
&
6
+
\ ,9
2
2
"
\<
, '< 0 '#0.
\
@@
'-
504
$
"#0 -
\
92
<
>
/
:
) .!
'
<
6M
>
6
< 9
9
9
&
0$ %
1$ $
!
"
"
92
,
&
%
& ;& "
<
>
E
c (=d
))
9
/ ! <
&
\
2
9
01 $
!
V'
$
K
(
9
96
><
'
+
2
<
'- "
R
6
&
+
'-
9
<
9
1 "
c ( d
! <. &
9
J
.$
,$
# #$
03
2
6
9
( " '
( &
I
> 2
>
9
<
9
6
<
69
9
9
9
+
6
<
>
69
'
&
@:
9
6
, !-0 Z 4 ,
96
2
"
, '< 0 '#0.
2
2
6
2
'-
$
"#0 -
504
9
9
$ /0 Q
G
/
/
0/15
"
# ! ,<
/
6
_<
1
"1
9," E$
1
K
+
9
+
+"
,0
#<
!/%/ )
,9
_
/
"
3
5
+
2
K
9
+ K
6
5 - "5
,$
03 ,
9
9 "$
\
,
,
'>
9
"<
'
A
96
!
&
6
A
X P #>
]$
O
!
A
# ! ,
2
6
,$
'0#< 6
,
B $
(
4
'
, !-0 Z 4 ,
0
0
#
'- "
c d'
>
,9
'-
A
3
"
*
1 "
2
9
>6
9
'
"
# ! ,< * 6
,$
'0#
E$
301
3# c (;d
9
2
1$ $
!
H 6
3#
9
.$
,$
# #$
03
+
+
E
C
01 $
!
V
>
^
"
, '< 0 '#0.
@B
'-
@
$
"#0 -
504
-
"
, '< 0 '#0.
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
WIRELESS COMMUNICATIONS AND MOBILE COMPUTING
Wirel. Commun. Mob. Comput. 2006; 6:1–14
Published online in Wiley InterScience
(www.interscience.wiley.com) DOI: 10.1002/wcm.487
2
3
Install or invoke?: The optimal trade-off between
performance and cost in the design of multi-standard
reconfigurable radios
4
6
Virgilio Rodriguez†,‡ , Christophe Moy and Jacques Palicot∗
SCEE Research Team, IETR, Supélec, Cesson-Sevigné, France
7
Summary
5
8
9
10
11
12
13
14
15
16
17
We approach the design of multi-standard reconfigurable radio as a choice between two extremes: (a) installing
complex, self-contained dedicated components that provide high performance at a high cost (‘Velcro approach’),
versus (b) supporting all targeted standards through primitive lower level components, which reduces cost but
maximises latency. To find the optimal trade-off between performance and cost, we build a mathematical model, in
which we represent the design choices through a graph of progressively simpler functional modules. We illustrate
our approach through a simple but realistic design example. Additionally, we show that this optimisation can
be reformulated as a version of the well-known network-design problem, which opens the door to a wealth
of results, algorithms and experience, already available in the scientific literature. We identify some specific
algorithms that can help us find the optimal design and discuss limitations and future directions of our research
program. Copyright © 2006 John Wiley & Sons, Ltd.
18
19
20
KEY WORDS:
reconfigurability; software-defined radio; design; graph theory; network design problem;
optimisation; multi-standard; SDR; parametrisation
21
21
1.
22
We view the design of a multi-standard reconfigurable
radio as choosing the optimal point between two
extremes. At one extreme is the ‘Velcro’ solution:
to support several communication standards through
self-contained complex components, each exclusively
dedicated to a given standard. At the other extreme,
we can attempt to support all desired standards
through very simple, primitive operators (adders,
multipliers, MAC, etc.) by invoking them repeatedly,
in order to perform the various communication tasks
23
24
25
26
27
28
29
30
31
Introduction
necessitated by the supported standards. The Velcro
approach will generally provide the best performance,
but at the highest monetary cost. In a Velcro
design, much functionality is wastefully replicated.
Conversely, by going to the other extreme, we can
minimise the monetary cost of the radio, but at
a performance level that may be unacceptable for
many practical applications. Thus, we need to find
the right level of complexity at which to use selfcontained dedicated components. This is the level
that gives the optimal trade-off between performance
and cost.
*Correspondence to: Jacques Palicot, Supélec, Avenue de la Boulaie, BP 81127; 3551 Cesson-Sevigné; France.
†
E-mail: [email protected]
‡
V. Rodriguez is now with the Institut für Nachrichtentechnik, at the Universität Karlsruhe, in Germany.
Copyright © 2006 John Wiley & Sons, Ltd.
32
33
34
35
36
37
38
39
40
41
42
43
2
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
V. RODRIGUEZ, C. MOY AND J. PALICOT
To find the optimal performance/cost trade-off, we
model the equipment as a hypergraph of progressively
simpler functional modules. The functionality of one
such module can be provided either by installing
a self-contained component, or by the invoking
(possibly many times) simpler (lower level) modules.
Thus, for us, ‘module’ refers to a functionality,
while a self-contained component is an optimised
hardware/software combination, dedicated to provide
a functionality in the most efficient manner. One such
component could be an equaliser, for example or an
implementation of the fast Fourier transform (FFT).
But rather than installing such component, we could
achieve the given functionality by invoking lower
level modules, instead. For example some forms of
equalisation can be achieved through the FFT [1];
and the functionality of the FFT operator can be
achieved via a butterfly module, which itself could be
implemented via CORDIC [2] (and each of the butterfly
and CORDIC modules could be implemented through
an installed component, or by invoking yet simpler
modules, and so on). Simpler, lower level components
are generally less expensive to build and can be reused
by several upper level modules inside and across standards. Thus, the use of lower level components reduces
the monetary cost of the equipment. Unfortunately,
such use generally increases the execution time of the
concerned task. As a consequence, the total execution
time of a given operation may exceed practical
limitations. Thus, to achieve the optimal trade-off
between performance and cost, we need to answer
repetitively the question: should we install or invoke?,
that is should we install now, at this level of complexity,
a self-contained component, or invoke yet lower level
modules to achieve the desired functionality?
At the foundation of our study is the ‘common
operators’ approach to the design of reconfigurable
equipment. Its main principle is the identification and
(re)use throughout the design of common components
that can each match several processing contexts, via
a simple parameter adjustment. This approach can
greatly increase the efficiency of a multi-standard
software-defined radio, both in terms of its cost,
and of the speed of reconfiguration during operation.
The common operators approach is discussed more
extensively in Reference [3].
The present publications builds upon some recent
work of ours. In Reference [4], we introduce our
formal approach to the design of multi-standard
radios. To explore the cost/performance trade-off in
Reference [4], we combine economic and latency
considerations into a single cost function. The
combined cost function (a weighted sum) is a
reasonable first step, as it simplifies the exposition
and the solution procedure. But we acknowledge that
this combination has some drawbacks: it adds the
monetary cost (paid once by the designer) with the
‘delay costs’ incurred by the user during each use,
throughout the entire useful life of the radio, it fails to
account for the hard time constraints, often arising from
communication applications, and ultimately makes
the chosen design highly dependent on the weights
(which are themselves arbitrary, even if judiciously
chosen with attention to real-life considerations). To
address these concerns, in Reference [5], we minimise
the (monetary) cost only, while considering latency
through appropriate execution time constraints that
cannot be exceeded. Neither of References [4,5]
provides the ideal algorithm to achieve the solution.
Both References [4,5] utilise exhaustive search (which
may be impractical for large design problems), and
Reference [4] briefly explores simulated annealing
(a general algorithms not specifically oriented to
our kind of problem [6]). In Reference [7], we
recast our problem as a ‘network-design problem’.
The network-design problem is well known and
counts with a very extensive literature, which offers
algorithms, computational complexity results and
many practical applications in a variety of contexts such
as telecommunication, transportation, water resources
and even social interactions [8]. Indeed, we have
already identified specific algorithms which seem
particularly well suited to our purposes, and which
we are examining [9–11]. In the present work, we
consolidate, clarify and where appropriate correct and
extend the work reported in References [4,5,7].
Neither our previous nor the present work addresses
the identification of new common operators, useful
to the design of multi-standard reconfigurable radios.
Such identification is, in its own right, an active area
of research. Researchers are proceeding along several
directions. For instance, Reference [1] shows that many
important tasks of a communication receiver can be
implemented through the FFT. In turn, the FFT can
be implemented via the butterfly operator, and, as
argued more recently [2], via CORDIC. With this in
mind, some researchers are seeking frequency-domain
implementations for different families of algorithms.
For example Reference [12] studies the frequencydomain implementation of Reed–Solomon channel
decoding.
Other relevant works describing interesting approaches to the design of reconfigurable radios include:
References [13,14], which argue for parametrised
DOI: 10.1002/wcm
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
THE OPTIMAL DESIGN OF MULTI-STANDARD RECONFIGURABLE RADIOS
174
design from a ‘common functionality’ perspective, as
originally advocated in Reference, [15]; Reference [16]
whose approach is inspired by object-oriented
programming; Reference [17] which attempts to cover
hardware and software design under a common
methodology; and Reference [18] which proposes
designs that integrate the entire system on a chip (SoC).
The rest of the paper proceeds as follows. First, we
build the mathematical model. This step includes the
drawing of a graph that represents design alternatives,
the consideration of possible performance metrics
and the specification of an objective function and
appropriate constraints. Subsequently, we discuss the
main approaches we have taken to consider cost
and latency in the same optimisation problem. We
then undertake the optimisation procedure under
two different cost functions (i) weighted sum of
monetary and ‘delay’ costs, as in Reference [4],
and (ii) monetary cost only, with ‘delay’ as a
constraint, as in Reference [5]. Subsequently, we
discuss and interpret our results. Then, we show that our
architecture optimisation enquiry can be reformulated
as a ‘network-design problem’, and comment on
specific algorithms available in the literature. We
conclude the paper, with a discussion addressing
further interpretations, as well as limitations and future
directions of our research program.
175
2.
176
2.1. Graph-Theoretic Representation
177
As illustrated by Figure 1, we represent a multistandard radio, as a hypergraph of progressively
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
178
Mathematical Model
Fig. 1. Graph corresponding to a conceivable tri-standard
reconfigurable radio.
3
simpler functional modules. Each node (module) represents a functionality that can either be implemented
via a dedicated hardware/software component (e.g.
an ASIC, or an intellectual property core), or can
be achieved by invoking lower level modules. The
hyperarcs leaving a node (parent) specify the simpler
modules (descendants) that could provide the required
functionality through multiple calls. Descendant nodes
may not all be at the same level.
An OR arc (direct arrow) means that only one of
the descendant nodes is necessary to implement the
functionality of the parent node. For example, S1 can
be implemented through any one of A1, A2 or B1. An
AND arc (inverted Y connection as pointing from S3 to
A4 and A5) means that all descendant nodes are needed
to implement the functionality of the parent node. In
some cases, a parent node may have both AND and OR
dependencies with its descendants. For example there
are three alternatives for providing the functionality of
S2: (1) installing a self-contained complex component
dedicated to S2, (2) making available both A2 and A3,
(3) making available A4.
The roots of this graph, at the highest level, represent
the standards to be supported by the radio. The fact
that the graph is acyclic helps the process of finding
the solution to our design problem.
179
2.2.
205
Optimisation Parameters
The decision to provide a functionality via a selfcontained, dedicated component or by invoking
multiple-times simpler modules is determined by two
key considerations: (monetary) cost and (execution)
time.
The monetary cost of a component (which is paid
only once during the useful life of the radio) represents
the total cost of including the component in the design.
In some architectures for reconfigurable radios, the
monetary cost can be represented by (is proportional
to) the number of logic units necessitated by an FPGA
implementation. It is best to take the view point of
a system integrator that ‘outsources’ the components
(software or hardware). Then, the monetary cost
represents the fair market value, at design time, of
acquiring the finished component in the open market.
Execution time (incurred every time a component
is employed throughout the life of the system) is also
a critical consideration, because the user has limited
patience, and in fact, communication standards impose
hard time constraints (deadlines) for the completion of
certain operations.
DOI: 10.1002/wcm
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
4
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
2.3. Considering Both Cost and Latency:
Two Approaches
There are at least two basic approaches to considering
cost and latency in the design of a reconfigurable
equipment: (i) the ‘combined’ cost function and (ii) the
deadline approaches.
As we have done in Reference [4], we can combine
economic and latency considerations into a single
cost function (a weighted sum of both ‘costs’). The
combined cost function has at least three serious
problems: (i) it adds the monetary cost (paid once by
the designer) with the ‘delay costs’ incurred by the
user, each time it executes a standard throughout the
useful life of the equipment, (ii) it fails to account
for the hard time constraints, often arising from
communication applications, and (iii) it makes the
design highly dependent on the weights, which are
themselves arbitrary. Nevertheless, the combined cost
function is a reasonable first step, as it simplifies
the exposition and the solution algorithm. It may
also be helpful in pre-design work, and could allow
the designer to gear a design to the ‘luxury’ (high
performance) or ‘value’ market sectors.
As an alternative, as in Reference [5], we can
minimise the economic cost only and take latency
into account through execution time constraints
(‘deadlines’) that cannot be exceeded. However,
determining the ‘deadlines’ to be observed while
designing the radio to support a given standard is nontrivial and should be done with great care. If deadlines
are set too high, they may lead to a design that fails
to perform at acceptable levels in practical situations.
But if deadlines are set lower than necessary, the
resulting design will be more expensive than necessary.
The key to determining the right delay tolerances
is to examine the ‘transmission chain’, applicable to
each supported standard (the block diagram depicting
the various steps necessary to establish end-to-end
communication under a considered standard, as shown
by Figure 2), and the numerical specifications provided
by the standardisation bodies. A chosen architecture
must yield a performance able to support end-to-end
communication under each of the supported standards.
We do not further address here the determination of
these deadlines and assume that the pertinent information has been obtained elsewhere and given to us.
274
2.4.
275
Conceptually, the ‘deadline approach’ includes as a
special case, the ‘combined cost’ approach. This is
276
Formalising the Optimisation Problem
Fig. 2. The key to determining appropriate ‘deadlines’ is an
examination of the ‘transmission chain’. This corresponds to
GSM, but our development does not specifically target that
standard.
so, because when we use the combined cost, we can
pretend that this cost is the original monetary cost, and
that we ignore the deadlines because they are ‘very
large’. Thus, we simply choose the least expensive
design. Presumably, the designs that are ‘too slow’ are
also (under the combined cost function) very expensive
and we would not choose them for that reason. Thus,
below we formalise the deadline approach only, and
think that when we use the combined cost function, all
deadlines are set to infinity.
Let:
Si with i = 1, . . . , I be the standards to be supported,
δi the ‘design deadline’ of standard Si .
Fk with k = 1, . . . , K be 0-1 variables such that,
Fk = 1 if component k is chosen to be installed in selfcontained (dedicated) form.
Ck and Tk be respectively the (monetary) cost and
the execution time of component k (if self-contained).
τi (F1 , F2 , . . . , FK ) be the time of executing the tasks
used to calculate the ‘deadline’ of standard i for a
particular choice of components.
Notice that not all the possible designs are
acceptable. For example if there are three root nodes
(standards), and the self-contained components that
can each execute an entire standard (for the ‘Velcro’
design) are labelled 1, 2 and 3, then, the point
(1, 1, 1, 0, 0, . . . , 0, 0), which means setting F1 =
F2 = F3 = 1 and all others Fj = 0, is in principle
acceptable (but not necessarily optimal, of course!. . .
this is the ‘Velcro’ architecture). But some other
choices would not support the desired standards (e.g.
DOI: 10.1002/wcm
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
(0, 0, . . . , 0)), which means building nothing at all, is
clearly unacceptable.
Let denote the set of all K-tuples of binary
numbers that correspond to acceptable choices of the
set of components (this is information taken directly
from the hypergraph).
Then we seek to solve:
min
(F1 ,F2 ,...,FK )∈
Fk Ck
subject to
τ1 (F1 , F2 , . . . , FK ) ≤ δ1
..
.
τI (F1 , F2 , . . . , FK ) ≤ δI
316
(1)
317
3.
318
In principle, the preceding problem can be solved
directly by computing the cost of every feasible
design (choice of components that supports the desired
standards and obeys the deadlines, if applicable), and
choosing the design of least cost. For larger problems,
a ‘smarter’ algorithm is needed. Below, we provide
an illustration which can be solved with minimal
computing effort.
319
320
321
322
323
324
325
Solving the Optimisation
326
3.1. A Realistic Illustration
327
Figure 3 (representing an evolution of a figure from
Reference [4]) shows a sub-graph, corresponding to
the decomposition of several processing elements
(equalisation, multi-channel, OFDM) that could be
part of a multi-standard radio system. The sub-graph
is not intended to show all the possible alternative
implementations for each of the considered processing
elements. Root nodes (standards) are not shown.
The equalisation block compensates for the multipath impairment typical of wireless channels. It
can be either implemented through a finite-impulse
response filter (FIR) or through the FFT operator.
The implementation of equalisation in the frequency
domain is particularly attractive for channels that
exhibit long-impulse responses, which leads to FIR
filters with a very high number of taps [19]. Notice
that the inverse FFT is computationally equivalent to
the FFT; thus, we attach a multiplicative factor of 2 to
the arc pointing from the equaliser to the FFT.
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
5
Multi-channel refers to the channelisation function
of a cellular base station. This can be accomplished
via the ‘classical’ channel per channel procedure, or by
proceeding in parallel, through a filter-bank channeliser
(which can be implemented via FFT). Other lower level
modules correspond to well-known signal processing
constructs.
The graph shows that at least the FFT operator is
needed to implement OFDM modulation. The graph
also shows that both equalisation and OFDM could
employ the same FFT operator, although it is optional
for FIR-based equalisation. A reconfigurable FFT
component could provide the functionality of FFT
operators of different orders.
The numerical values, near the bottom right of
a block represent cost/time associated with the
component that could provide that functionality. The
units of measurement are immaterial for our purposes.
We only need relative figures to be able to compare
one design alternative to another. But the time figures
must be consistent with those in which the deadlines are
expressed. At the top left, there is a numerical identifier
for the corresponding module.
The arcs are tagged with a number of calls (NoC)
figure. When a node is needed several times by a
higher level module, it is called several times, and not
physically replicated. Accordingly, the multiple calls
affect the latency of the system, but not its monetary
cost.
Below, we assume that the channeliser handles
25 channels, that the FFT is of order 64, and that to
implement this FFT through a butterfly, 192 calls are
needed.
346
3.2.
379
Results and Interpretations
(1) Deadline approach: The sub-graph of Figure 3 is
small enough to be solved by exhaustive search,
although the number of design alternatives quickly
explodes. The thick broken blue line in Figure 3
shows an FFT-only design, which is a conceivable
outcome of the optimisation, for a specific set of
deadlines.
In Figure 4, we integrate the information in
Table I and Figure 3 into the widely available
spreadsheet model, which may be sufficient for
smaller designs. Figure 4 illustrates the solution
procedure and shows some interesting design
alternatives. For this illustration, we have ignored
the designs that use the simplest (lowest level)
components (adder, multiplier, etc). Thus, the
simplest considered component is the CORDIC.
DOI: 10.1002/wcm
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
6
Fig. 3. Partial hypergraph with some design parameters and a plausible solution indicated through the thick broken (blue)
line. A block represents a functionality (module). Near the bottom right of a block are cost/time values, associated
with the component that could provide the concerned functionality if installed (the number at the top left is just an
identifier).
396
397
398
399
400
401
402
403
404
405
406
407
A 1 in the column corresponding to a module
means that, in that particular design, the module is
implemented via a dedicated component. T 1, T 2
and T 3 are the execution times corresponding to
OFDM, equalisation and the channeliser for a given
design. The deadlines should be applied to the
entire transmission chain (not to individual tasks).
Nevertheless, for the sub-design illustration under
consideration, we pretend that the tasks at the top of
the graph have associated deadlines (determined by
the supported communication standards). We have
considered that an order-64 FFT operator satisfies
our requirements and that the multi-channeliser
handles 25 channels.
The ‘Cost’ column shows that the least expensive design consists of a CORDIC component
only, but it performs slowly. This would be the
chosen design if the deadlines are, for example
6200, 12 300 and 320 000, respectively (recall
that no time unit has been specified so far).
If the deadlines are tighter, the CORDIC-only
design falls outside the feasibility region. Then
other candidates may be chosen. For example the
butterfly-only design performs a good deal better
DOI: 10.1002/wcm
408
409
410
411
412
413
414
415
416
417
418
419
7
Fig. 4. The figure illustrates how to integrate the design data and procedure into the widely available spreadsheet
model, which may be sufficient for smaller designs (the data displayed vertically may be easier to read in Table I).
Only certain interesting design alternatives are shown. A row represents a design. A 1 in the column corresponding to
a module means that this module is implemented via a self-contained component. T 1, T 2 and T 3 are the execution
times corresponding to, respectively, OFDM, equalisation and the multi-channel channeliser for a given design. The
‘Cost’ column shows that the least expensive design consists of a CORDIC component exclusively, but performs poorly.
The ‘Velcro’ solution (bottom) performs fastest but costs most, as it implements the top modules with self-contained
components.
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
in all three tasks, and is only marginally more
expensive. The FFT-only design costs about
70 times more than the butterfly-only alternative,
but performs seven to eight times better, and could
very well be the optimal choice under tighter
time constraints. The ‘Velcro’ solution performs
best and costs most, as it implements the top
modules with dedicated components. The FFTonly design is far behind the Velcro approach,
only with respect to T 3 (multi-channeliser). This
performance gap narrows considerably if one
complements the FFT component with a filter-bank
component. The Filter-bank/FFT combination
provides performance near Velcro’s, for about one
quarter of the cost.
(2) Combined cost function: We can use the data
of Figure 4 to build Table II and illustrate the
combined cost function approach, which we first
applied in Reference [4]. In Table II, the columns
labelled CC(w) contain for the corresponding
design the ‘combined cost’, which is a weighted
sum of the form w × cost + T 1 + T 2 + T 3, where
w is the weight. A weight of zero simply means
that ‘money is no object’, and we want the best
performing design at any monetary cost. In that
case, as column CC(0) shows, the ‘Velcro’ design
is optimal. But as soon as we consider cost at
all, even with a fairly small weight of only 1
(column CC(1)), the Velcro approach becomes
non-optimal. With a weight of 13 the FFT-only
design is the winner, which is consistent with the
results presented in Reference [4]. And it takes a
much larger weight (1000) for the butterfly-only
design to become optimal. With a huge weight on
DOI: 10.1002/wcm
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
8
Table I. The table shows for each module in Figure 3, its numeric
identifier, verbal description, as well as its monetary cost and
execution time, if implemented through a self-contained component.
Money and time units are not specified.
454
455
456
457
ID
Description
Money
Time
73
72
71
62
61
51
42
41
31
23
22
21
13
12
11
4
3
2
1
OFDM
Equaliser
Multi-channeliser
Filter bank
Chanel-per-channel
Multi-rate filter
FIR
Down sampler
FFT
Butterfly
Down converter
CIC filter
MAC
CORDIC
CIC cell
Look-up table (LUT)
Adder
Multiplier
Delay
1200
1500
10 000
2000
1500
700
500
1
1000
15
30
300
12
11
10
1
4
10
1
300
200
10
100
200
700
1000
1
500
20
10
50
8
8
2
1
2
5
1
monetary cost (50 000), which essentially means
we seek the least expensive design without concern
for performance, the CORDIC-only design is
chosen.
459
4. Toward an Efficient Algorithm: The
Network Theory Reformulation
460
4.1.
461
Above, we have described a formulation that allows
us to find for a multi-standard reconfigurable radio,
an architecture which is optimal in view of both cost
(money) and performance (computational execution
time). We can find the exact optimum through
exhaustive search (‘brute force’). In Reference [4], we
also explored finding an approximate solution through
simulated annealing. The ‘brute force’ approach is
458
462
463
464
465
466
467
468
Motivation
computationally impractical when there is a very large
number of design alternatives. The second approach
utilises a ‘generic’ algorithm not specifically oriented
to the problem at hand, and may or may not yield a
solution near the ‘true’ optimum. A third alternative is
to seek a ‘smart’ algorithm which fully utilises the
special structure of the graph to efficiently search the
solution space. In the present section, we move in
that direction, by recasting our problem as a singlesource (or single terminal (sink)) ‘network-design
problem’.
The network-design problem is well known and
counts with a very extensive literature, which offers
algorithms, computational complexity results and
many practical applications in a variety of contexts such
as telecommunication, transportation, water resources
and even social interactions [8]. Below, we show how to
perform the problem reformulation and give a specific
example grounded on a practical design. In support of
our approach, we identify and discuss several specific
algorithms which seem particularly well suited to our
purposes [9–11].
469
4.2.
491
‘Network-Design’ Problem
Figure 5(a) provides a simple illustration of the singlesource network-design problem. Suppose there is an
initial point called the origin, o, and three ‘terminals’
(destinations), t1 , t2 and t3 . We wish to connect o to
each ti in a way that satisfies certain optimality criteria.
There may also be some intermediate nodes, which
themselves may be (partially) interconnected. There
are many conceivable solutions. The most obvious one
is simply to build three direct ‘roads’, one from o
to t1 , a different one from o to t2 and a third one
from o to t3 . This solution is certainly plausible and
probably provides the shortest ‘travel times’, but it is
unlikely to be ideal. By building dedicated roads, we
are possibly missing the savings resulting from having
‘common segments’ which may be used by all traffic,
Table II. For each design, the combined cost (weighted sum) is obtained as CC(w) = w × cost + T 1 + T 2 + T 3 where w is the weight.
Design
CORDIC
CORDIC+DSamp
Butterfly
FIR+CORDIC
FIT+butterfly
FFT
FilterBk+FFT
‘Velcro’
Cost
T1
T2
T3
CC(0)
CC(1)
CC(13)
11
12
15
511
515
1000
3000
12 700
6144
6144
3840
6144
3840
500
500
300
12 288
12 288
7680
1000
1000
1000
1000
200
313 344
307 925
195 840
31 144
28 840
25 500
100
10
331 776
326 357
207 360
38 288
33 680
27 000
1600
510
331 787
326 369
207 375
38 799
34 195
28 000
4600
13 210
331 919
326 513
207 555
44 931
40 375
40 000
40 600
165 610
CC(1000)
342 776
338 357
222 360
549 288
548 680
1027 000
3001 600
12700 510
CC(50 000)
881 776
926 357
957 360
25588 288
25783 680
50027 000
150001 600
635000 510
DOI: 10.1002/wcm
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
9
(a)
(b)
Fig. 5. The network-design problem and the design of multi-standard reconfigurable radios. (a) A simple network-design problem;
(b) The ‘network-design’ version of a section of the design graph. Solid arrows indicate ‘free’ pre-built ‘algorithmic roads’ that
take time to travel, but require no monetary expenditure.
507
508
509
510
511
512
513
514
regardless of the destination. For example we could
build a road from o to some intermediate point, A,
plus dedicated segments from A to each ti . Because the
segment o → A is used by all traffic, certain savings
may result (although this may lengthen the travel time
to all traffic).
By the same reasoning, building dedicated segments
from A to each ti may not be optimal. Perhaps we can
build a dedicated road A → t3 , but also a segment from
A to some intermediate point B, followed by segments
B → t1 and B → t2 . In the latter proposal, all traffic
would travel over the o → A segment, the t3 traffic
would go directly over A → t3 while both the traffic to
t1 and to t2 would continue over A → B, and from B
over B → t1 and B → t2 , respectively. There are many
other possibilities.
DOI: 10.1002/wcm
515
516
517
518
519
520
521
522
10
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
4.3. From a Hypergraph to a
Network-Design Problem
Our problem can be recast as a network-design
problem by proceeding as follows. The communication
standards correspond to terminals we wish to reach.
Building a direct road from the origin to each terminal
corresponds to the ‘Velcro’ solution: choosing a selfcontained dedicated complex component to support
each standard. This will provide the ‘fastest routes’ to
be sure, but probably not the ideal solution. Installing
a self-contained component of ‘medium’ complexity
is the equivalent of building a road from the origin to
the intermediate point A in the example above. For
instance, Reference [1] shows that many important
tasks of a communication receiver can be implemented
through the FFT. In turn, the FFT functionality can be
implemented with a butterfly operator. Thus, we can
‘build a road’ from the origin to the FFT (i.e. install a
self-contained FFT component) to be shared by several
tasks. This will likely reduce the overall cost of the
design, although it will generally ‘increase the travel
time’. But we could also ‘reach’ the FFT node (i.e.
provide the functionality of the FFT operator), by first
‘building a road’ from the origin to the intermediate
junction ‘butterfly’ and from there utilise an ‘existing
road’ to the FFT (i.e. install a self-contained butterfly
component, and invoke it repeatedly to provide the FFT
functionality). Further explanation of the reformulation
is given below through examples.
4.4. Illustration of the Graph-to-Network
Conversion
Figure 5(b) shows the network-design problem,
corresponding to a section of the realistic hypergraph
of design choices given by Figure 3. The parts of the
graph that concern lowest level components and the
channeliser have been ignored for pedagogical reasons.
Recall that now the objective is to build a ‘road network’ that provides a path from the origin to each ‘terminal’. The solid arrows mark ‘free’ (pre-built) roads
that take time to travel, but require no monetary expenditure. These represent known algorithms which can be
used to provide a higher level functionality. For example from the FFT node there is a pre-built (free) ‘road’
to OFDM, another to equalisation and yet another to
the FIR node, because there are known algorithms that
allow the implementation of equalisation, OFDM and
FIR through the FFT operator [1]. The dashed arrows
indicate ‘possible roads’ that definitely cost money to
build (but possibly negligible time to travel). ‘Building
a road’ means installing a self-contained component
to provide the functionality pointed by the arrow. For
example there is a dashed arrow from the origin to FFT
(meaning we can provide the FFT functionality by installing a self-contained FFT component). There is also
a ‘possible road’ from the origin to butterfly. If we build
this ‘road’ (i.e. if we install a self-contained butterfly
component) we can ‘reach’ the FFT node via a free (algorithmic) path (shown by a solid arrow from butterfly
to FFT), because one can provide the FFT functionality
by repeatedly invoking a butterfly operator.
Figure 6 shows in greater detail the ‘networkdesign problem’, corresponding to Figure 3. The
fact that the digital down conversion module can be
done through CORDIC, and the lowest level modules
are not considered, for clarity. And some obvious
dashed arrows from the origin (e.g. from the origin to
equaliser, meaning installing an equaliser component)
are omitted to reduce clutter. The main purpose
of Figure 6 is to show that representing the AND
dependencies is challenging in some cases, and may
require some ingenuity and additional research. The
FIR ‘triple node’ illustrates some of the main issues.
A ‘triple’ FIR node is used in Figure 6 to show
that there are three different ‘routes’ to get the FIR
functionality, and the route has an impact on what can
be done from there. The ‘sub-node’ FIR3 concerns
the case where the FFT is used to implement the
FIR. The ‘sub-node’ FIR2 refers to the case when
an FIR component is installed, even though the FFT
functionality is present (this may be done to reduce
execution time). FIR1 denotes a case in which an FIR
component is installed and the FFT functionality is
not available (perhaps OFDM and equaliser dedicated
components have been installed, and the FFT module
is unnecessary). In the cases FIR2 and FIR3 , the
functionality of the filter-bank module can be achieved
algorithmically, because both the FFT and the FIR
are available. However, under FIR1 , there is no
FFT module, thus, the filter-bank functionality would
require an installed component. For this reason, there
is no arrow pointing to the filter-bank module from
the FIR1 sub-node. On the other hand, the solid arrow
pointing to the equaliser from the FIR is drawn from the
oval, around the various FIR sub-nodes. This indicates
that we can achieve equalisation through the FIR,
irrespective of how we achieve the FIR functionality
(contrary to the filter-bank case).
Shown also on Figure 6 is a simpler case of
AND dependency, which involves achieving multirate filtering (MRateFilt) through both FIR and down
sampling (DSamp). The dash line from the FIR oval
DOI: 10.1002/wcm
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
11
Fig. 6. Network view of the partial hypergraph given in Figure 3. Some low level connections not shown.
624
625
626
627
628
to DSamp, followed by a solid arrow from DSamp to
MRateFilt, shows that if we have the FIR functionality
(no matter how) and we install (‘build a road to’) a
down sampler, then we can achieve multi-rate filtering
‘for free’ (algorithmically).
629
4.5.
630
The reformulation of Section 4 provides us with several
well-studied algorithms. Of these algorithms, we shall
now identify and discuss some that seem particularly
promising.
Reference [9] considers a network-design problem
with two metrics: cost and ‘distance’. This fits well
with our formulation, with execution time as ‘distance’.
It follows the first approach discussed in Section 2.3,
that is it combines both cost and ‘distance’ into a
single objective. But this algorithm is probabilistic.
Reference [10] modifies it to make it deterministic,
utilising linear programming.
The basic idea of Reference [9] is as follows. The
algorithm has several stages. At each stage, it forms a
matching (‘pairing’) of terminal nodes (not previously
eliminated). Then, of each pair of nodes, it chooses
a ‘centre’. At this point, the non-centre nodes are
removed from further consideration and a new stage
of the algorithm starts (with a new matching among
the ‘centres’ only). Because for us, the terminals
correspond to standards to be supported by the radio,
they should be relatively few. Thus, this algorithm will
give an answer in a relatively short number of stages.
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
Possible Algorithms
For reasons given in Section 2.3, it may be preferable
to utilise an algorithm which minimises costs, while
keeping ‘length’ (time/latency) under a specified
budget. Reference [11] provides an algorithm that does
exactly that. However, this algorithm appears more
complex than the preceding ones.
Currently, we are evaluating these algorithms, while
continuing our exploration of the rich network-design
literature.
653
5.
662
Discussion and Outlook
We have built a mathematical model that enables
us to identify an architecture for a multi-standard
reconfigurable radio, which is optimal, in view of
both performance and cost. The chosen architecture
represents an optimal point between two extremes:
(i) highly complex communication components, each
exclusively dedicated to a given standard (‘Velcro’
approach), and (ii) very simple components to
be invoked by ‘higher layers’, in support of the
various standards. We represented the radio as
a hypergraph of progressively simpler functional
modules.
We have illustrated our approach through a
simplified ‘sub-design’. We do not claim that it
corresponds to a system available today, but it is
sufficiently close to reality to provide useful insights.
Our results agree with intuition. When the execution
time constraints, imposed by the supported standards
DOI: 10.1002/wcm
654
655
656
657
658
659
660
661
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
12
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
on the considered high-level modules are very tight (or
when the weight given to monetary cost in a combined
cost function is relatively low), the more complex
dedicated components are chosen. On the other hand,
if the time constraints are lenient (or if the weight given
to money in a combined cost function is relatively
high), money is saved by supporting the standards
through simpler (lower level) components, such as the
FFT, butterfly or even the CORDIC operator. The cost
and time figures we have used are, in our judgement,
reasonable and consistent. But we acknowledge that
obtaining better estimates of these values for realistic
designs is an important future task.
Our analysis also sheds some light on some of
the economic issues that may arise when a single
architecture targets several communication standards.
An optimal architecture that exclusively supports
standards oriented to ‘slow’ applications, such as
voice and text messaging, will favour lower cost,
simpler, reusable components. Thus, a consumer who
is primarily interested in such ‘traditional’ applications
will be better off by purchasing equipment that supports
only a few simple communication standards. In other
words, certain care must be taken in choosing the set
of standards to be supported by a single multi-standard
reconfigurable radio.
The algorithm chosen to find the optimal architecture
requires additional attention. In solving our ‘subdesign’ illustration, we adopted a ‘manual’ (spreadsheet) approach, while in previous work, we explored
briefly simulated annealing [6]. Both of these choices
have problems already discussed. In search of a better
algorithm, we have reformulated our design problem
as a ‘network-design problem’, which opens the door
to an extensive literature, offering many algorithms
and results. Indeed, we have already identified and
discussed some specific network-design algorithms
which seem particularly promising. While we have
shown simple examples of the transformation of
the hypergraph of design choices into a networkdesign problem, we have not formally proved that
every conceivable such hypergraph can be exactly
represented as a network-design problem. But even if
this representation is not possible in specific cases, by
ignoring certain non-essential connections, one may
obtain a network-design problem that is sufficiently
close to reality for its solution to provide a good
approximation to the optimal design (which perhaps
can then be revised manually). As an alternative, one
could study the inner workings of an appropriate
network-design algorithm and try to adapt it to the
problem at hand.
Several important relevant issues have been
neglected so far, or require additional attention.
For example, we have assumed that for every
functional module, there is exactly one component
which can provide the desired functionality at a
given performance level, and for a given cost. In
practise, for a given functionality (say the FFT
operation), the market may provide many alternatives,
each offering a different price/performance tradeoff. Our formulation can be modified to consider
various components providing the same functionality.
Building the hypergraph of design choices is itself
an object of research. We have already pointed out
that researchers are trying to identify new operators
that may be common to several communication
blocks within a given standard, or across several
standards. The identification of any such operator
will lead to a modification of the graph. Likewise,
some researchers seek frequency-domain algorithms
that provide functionality that is typically provided
by time-domain procedures. The discovery of any
such algorithm will imply a modification of the graph
with new arcs, pointing to the FFT operator. Also, as
communication standards evolve, the graph may need
to be modified.
Other important issues requiring (additional)
attention include consideration of (i) the time needed
to re-configure the radio, while switching from a
standard to another, (ii) the ‘travel time’ of signals
from a component to another inside the radio, and
(iii) the possible contention among high-level modules
for the service of the same lower level module. This
contention may be particularly important if the radio
needs to support simultaneous communication over
several standards at the same time, such as, for example,
downloading a file through a wireless local area
network, while simultaneously video conferencing
through UMTS.
Our framework is easier to visualise if one thinks
of a ‘component’ as a physical entity (such as a
chip) that may or may not be installed. However, our
approach is more general than that. For instance, it can
accommodate a central processor-based architecture.
In a design based on a digital signal processor
(DSP), the components could be ‘objects’ (in the
object-oriented programming sense, as advocated in
Reference [16]). The cost of one object would
be its ‘fair market value’ if ‘outsourced’. The
basic trade-off between performance and cost would
remain. Presumably, primitive objects should be very
inexpensive to outsource or develop. But supporting all
communication tasks by invoking very simple objects
DOI: 10.1002/wcm
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
806
should likely lead to unacceptably slow designs. At
the other extreme, a highly optimised object that is
dedicated to support a standard should provide the
best performance, but require the highest monetary
expenditure, whether this object is outsourced or
developed ‘in house’. On the other hand, the tradeoff between the processing power and the cost of the
DSP should also be considered in the optimisation,
which we have not directly addressed in the present
report. Besides cost and computing performance, other
factors such as the DSP’s energy consumption and heat
production may have to be considered as well.
Ultimately, we are well aware that we have raised
more questions than we have answered. Nevertheless,
finding for a multi-standard reconfigurable radio, an
architecture that is optimal in view of both performance
and cost is a very worthy cause. We believe that this
report establishes the general validity and usefulness of
our approach. Indeed, its greatest contribution might be
to attract the attention of other researchers whose work
may address some of the important questions we have
left unanswered.
807
Acknowledgement
808
809
Financial support by the ‘Région Bretagne’ of France
is gratefully acknowledged.
810
References
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
1. Palicot J, Roland C. FFT: a basic function for a reconfigurable
receiver. In Telecommunications. 10th International Conference
on, vol. 1, Papeete, Tahiti, pp. 898–902, 2003.
2. Heyne B, Goetze J. A pure CORDIC based FFT for reconfigurable digital signal processing. In 12th European Signal
Processing Conference, Vienna, Austria, September 2004.
3. Palicot J, Roland C, Louet Y, Al Ghouwayel A. A
new parameterization technique for reconfigurable radio:
the common operator approach, 2006. Technical Report
TR/SCEE/01.V1/2006. Supélec (Campus de Rennes): CessonSevigné, France.
4. Moy C, Palicot J, Rodriguez V, Giri D. Optimal determination of
common operators for multi-standards software-defined radio.
In 4th Karlsruhe Workshop on Software Radios, Karlsruhe,
Germany, March 2006.
5. Rodriguez V, Moy C, Palicot J. An optimal architecture for a
multi-standard reconfigurable radio: Cost-minimising common
operators under latency constraints. In 15th IST Mobile and
Wireless Communications Summit, Myconos, Greece, June
2006.
6. Kirkpatrick S, Gelatt C, Jr., Vecchi M. Optimization by simulated
annealing. Science 1983; 220(4598): 671–680.
7. Rodriguez V, Moy C, Palicot J. An optimal architecture
for a multi-standard reconfigurable radio: a network theory
re-formulation. In Personal, Indoor and Mobile Radio
Communications. 16th IEEE International Symposium on,
Helsinki, Finland, September 2006.
13
8. Magnanti T, Wong R. Network design and transportation
planning: models and algorithms. Transportation Science, 1984;
18(1): 1–56.
9. Meyerson A, Munagala K, Plotkin S. Cost-distance: Two metric
network design. In Foundations of Computer Science. 41st
Annual Symposium on, Redondo Beach, CA, U.S.A., November,
pp. 624–630, 2000.
10. Chekuri C, Khanna S, Naor J. A deterministic algorithm for the
cost-distance problem. In 12th annual ACM-SIAM symposium
on discrete algorithms, Washington, DC, USA, January, pp. 232–
233, 2001.
11. Marathe MV, Ravi R, Sundaram R, Ravi SS, Rosenkrantz
DJ, Hunt HB. Bicriteria network design problems. Journal of
Algorithms 1998; 28: 142–171.
12. Al Ghouwayel A, Louët Y, Palicot J. A reconfigurable butterfly
architecture for Fourier and Fermat transforms,’ In 4th Karlsruhe
Workshop on Software Radios, Karlsruhe, Germany, March
2006.
13. Wang H, Cai T, Song J. Analysis of common structure and
software download for software defined radio. In International
Conference on Communication Technology, Beijing, China,
August, vol. 2, pp. 1112–1116, 2000.
14. Jondral F. Parametrization—A technique for SDR Implementation. In Software Defined Radio—Enabling Technologies,
Tuttlebee W (ed.). John Wiley & Sons: London, 2002; 233–256.
15. Rhiemeier A-R. Benefits and limits of parameterized channel
coding for software radio. In 2nd Karlsruhe Workshop on
Software Radios, Karlsruhe, Germany, March 2002.
16. Jha US. Object oriented HW functions accelerate communication, computing, and multimedia convergence. In Personal,
Indoor and Mobile Radio Communications. 16th IEEE
International Symposium on, Berlin, Germany, September 2005.
17. Tuttlebee W. Software-defined radio: Facets of a developing
technology. IEEE Personal Communications 1999; 6(2): 38–44.
18. Belzile J, Bernier S, Uhm M. Software radio modems enter the
SoC era. COTS Journal 2004; 6. http://www.cotsjournalonline.
com/home/article.php?id=100191.
19. Bidet E, Cardin J, Kouam M, Joanblanq C, Palicot J. FDF, a 512TAP FIR filter using a mixed temporal-frequential approach. In
IEEE Custom Integrated Circuits Conference, Santa Clara, CA,
USA, pp. 173–176, 1995.
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
Authors’ Biographies
879
Virgilio Rodriguez is a Research
Associate with the Institut für Nachrichtentechnik, at the Universität Karlsruhe,
in Germany. He is involved with ultra
wide band (UWB) technology, as part of
the European project PULSERS. From
the Fall of 2005 through the summer of
2006, he was at Supélec (Campus de
Rennes), in France, where he applied
mathematical formalisms to achieve designs of multistandard reconfigurable radios that are optimal in view
of both cost and performance. Before this, he spent a
year as a Research Fellow at the University of Surrey,
England, where he applied market principles to the dynamic
management of the radio spectrum, as part of the End-to-End
Reconfigurability (E2 R) European project. He received his
PhD degree in May 2004, from the Polytechnic University
(Brooklyn, New York), where he focused on analytical
studies for radio resource management in the context of thirdgeneration cellular communication networks. Previously, he
received Master’s degrees from both Purdue University and
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
DOI: 10.1002/wcm
14
901
902
the Virginia Polytechnic Institute, for work, respectively, in
electrical and systems engineering.
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
Christophe Moy is an engineer of the
INSA (National Institute of Applied
Sciences), Rennes, France, 1995. He
received his M.Sc. and Ph.D. degrees in
Electronics in 1995 and 1999 from the
INSA. He worked from 1995 to 1999 on
spread spectrum and RAKE receivers
for the IETR. He then worked 6 years at
Mitsubishi Electric ITE-TCL research
lab where he was focusing on Software Radio systems and
concepts, including digital signal processing, HW and SW
architecture, co-design methodology and reconfiguration.
He also addressed the design of SDR impulse UWB systems.
He represented Mitsubishi Electric at the SDR Forum and
worked on the French research program A3S, as well as the
IST project E2 R phase 1. He is now an Associate Professor at
Supélec, campus of Rennes, France. His research is done in
the SCEE laboratory, specialized in software and cognitive
radio, which is part of the IETR. He addresses heterogeneous
architectures design and reconfiguration management for
SDR and cognitive radio. He participates to the IST Network 923
of Excellence Newcom, the IST project E2 R phase 2, as well 924
as the French research program IDROMel.
925
Jacques Palicot received the Ph.D.
degree in signal processing from the
University of Rennes, France, in 1983.
Since 1988, he has been involved in
studies about equalization techniques,
applied to digital transmissions and new
analog TV systems. Since 1991, he
has been involved mainly in studies
concerning the digital communications
area and automatic measurement techniques. He has taken
an active part in various international bodies (EBU, CCIR),
and in European projects. He has published various scientific
articles, notably on equalization techniques, echo cancellation, neural techniques and hierarchical modulations. He is
currently involved in adaptive signal processing and in new
techniques as software radio and Cognitive Radio techniques.
Since October 2003, he has been with Supelec, Campus of
Rennes, where he is a Professor and head of the Signal,
Communication & Embedded Electronics research team.
DOI: 10.1002/wcm
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
'-
504
$
"#0 -
-
,*
.$
,$
# #$
03
1$ $
!
9
< 9 + +
6
2
*9
&
,9 >
)
+ >
2 6
&
"
1 , - 4
2
6 A
9
$
"
+
c (:d 3
A
c (:d
<
6 (
< 2
>6
>
>6
5
>
-1 5
,
c?d
K
9
&
,
>&
2
(
>&
, '<
2
/
6
>&
6
K
2
<
6
A9
&
9
@
K
B
6
+
6
2
>
T0 3#0
>
c (:d
>6
"
9
(
A
2
>
/ ! 6
2
9
>
2
&
6
<
&
*
'- "
>( 6
c (@d 5
69 (
6 A9
,9 >
1
%
>&
'-
" +.! &
,
9
1 "
,
6
0
/<
(
6
9
2
A
6
>
9
<
K
6
Mode 1
Mode 2
Mode n
) .! G M
$!% < & &
$
&
!.
&
&
% ." %
%3
1
&"! % 7! ! %7!
(
" +.! &
&
%&
5 & &
&!0 : /$
% " &N
3
1 .&!" 5
<
<
6
2
<
+
+
+
>
6
<
>6 <
<
c (Bd
>
"
9
92
A
"
$!% <
K
&
, '< 0 '#0.
B=
'-
$
"#0 -
,
/
504
9
-
6
9 >
26
=<
,=g 5
6
+ K
%
>
9
,
>
>
)
<
2
.$
,$
# #$
03
,
,=g 5
O6
"$
'%
&
'- "
'
< 69
%
&
2
2
<
$
9
'-
6
2
>6
)<
/ !
>
2
K
>
1 "
<
%
<
1$ $
!
1"
/ !
9
K
6
(
T
9
,
2
<
)
=
)<
9
6
6
= 1
2
9
<
9
>
Global manager
Standard parameter control
Dispatches orders to lower layers
Config. Manager
L1_ReM
Standard Set
Config. Management Level 2
L2_ReMUs :
–
–
–
Function Level
Independent of the HW
Manages several elementary PBprocessing blocks of lower granul.
Processing blocks configuration
Embedded very closely to the PB
Dedicated to the nature of
reconfigurable resources
) .!
HM
Function Set
Function Set
L3_ReMU L3_ReMU
Block Set Block Set
"
+.! &
1*
>
2
. ,
6
%, g 5)
9
. '
<
2
6
K
9
9
,
<
,=g 5
>6
D >
<
K
>
/ !
>
> ,
B;
' &!0
9
.
9
…L3_ReMU
Block Set
&
92
L2_ReMU
…Function
Set
L2_ReMU L2_ReMU L2_ReMU
Function Set
L3_ReMUs :
–
–
–
$
L1_ReM :
–
–
–
9
96
6
9
6
O
, g 5 <
, g 5
(O
D
6
&
9
0
2
9
K
"
, '< 0 '#0.
'
1"
9
&
'-
504
$
"#0 -
-
<
A
9 >
!
=
/ !
-1 5
K
6
2
'
+
<
/ !
&
J
1 "
'-
69 &
) 9
9
K
6
,
1$ $
!
96
%
6
< 1"
.$
,$
# #$
03
19Q
92
<
K
9
&
>
&
<
J
1"
<
6
>
/ !
6
'- "
K
%
)
2
/ !
A
B
1 , - 4
, /
>&
? 2
9 A
D
6
E
2+ <
:B i
9
2
9
6 6
E
+ >
,
(
/ !
1
92
*
9
6
>
,
=@ X
,
+
2
2R
E
2+;
9
L
+
'
*
GUI
Host PC
FPGA
SystemRec_REQ
PC
(host)
FPGA
DSP
.out
.bit
.bit
DSP
new
µBlaze
SystemRec_ACK
"
( )
data
generator
wire
display
FIR
Reconf.
manager
MMI
FIR
reconfig.
controller
mapping
GUI
Host PC
mapping
reconfig.
controller
GUI
Host PC
PartialRec_REQ
FIR
)
µBlaze FPGA
REQ(partial)
ql
.bit_partial
.bit_partial
DSP
PartialRec_ACK
PartialRec_REQ
MAP
µBlaze FPGA
REQ(param)
ql
DSP
REQ(param)
ql
PartialRec_ACK
PartialRec_ACK
IP
!"
#
"
$%
&
!"
#
"
) .!
$%
"
IM
$%
#$
&
"
&
!
3 /T-
$
*
%&!
1 ,0 5
6
>
Reconfiguration duration
832 kB
16,6 ms
FIR reconfiguration
35 kB
670 µs
$%
'
9
Bitstream size
FPGA total reconfiguration
(
+
#'
(44G
9
2
c ( d
c (?d
c ( d
+
9 >
9
>
6
"
96
>
W
"
, '< 0 '#0.
9
K
,
6
'
B@
'-
504
$
"#0 -
>
81
'
6
2
$
9
(
5
>
>
6
\
>
>
;
.$
,$
# #$
03
>
%
<
<
( >
J
72
92
&
6
6
2
>
<
'- "
6
,9
9
'
'-
9
6
&
6
>
> 2
1 "
>
9
;
6
1$ $
!
2
>&
"
N) '
@
*
+ >
')2 %
+
& <
c (Bd ,
-
< A9
>
9
1 , - 4
"
' )
_5
1(
_(
D
2
9
69
@
/ !
'
2
/ !
B
/ !
]
1(
"
'
3
45
C
^
'2,
.
2
1 , - 4<
3 C X
2
B:
+
'>
)
>
"
$
H
2 %
"
3 /TG / !
6
'
%# #")
, '< 0 '#0.
>&
*
*
+ >
1 ,0 5
2
,
MANAGING DYNAMIC PARTIAL RECONFIGURATION ON
HETEROGENEOUS SDR PLATFORMS
Jean-Philippe DELAHAYE, Christophe MOY, Pierre LERAY, Jacques PALICOT
IETR/Supelec-SCEE Laboratory, Campus of Rennes, France
{jean-philippe.delahaye, christophe.moy, pierre.leray, jacques.palicot}@supelec.fr
ABSTRACT
This paper deals with partial reconfiguration issues on
heterogeneous prototyping platforms (DSP/ FPGA). An
analysis of multi-standard physical layer applications in
terms of reconfiguration needs permits to extract several use
cases of reconfiguration in a multi-standard handset:
standard switching, mode switching, bug fixing, etc. The
main difference between these schemes of reconfiguration is
the level of granularity of the reconfiguration. We argue that
a configuration manager needs to handle this multigranularity of reconfiguration to optimize the change of
context (either reconfigurable hardware or programmable
software processing components) in terms of size of code,
time to reconfigure, etc. The analysis also helps to
determine the accurate level of flexibility needed through
the reconfigurable architecture at different scales. Within
this framework partial reconfiguration is an essential feature
for optimizing the reconfiguration inside FPGA. We aim
indeed at providing reconfiguration adequacy between reconfigurability capabilities of hardware resources and
reconfiguration needs of Software Defined Radio
applications. Architectural solutions are proposed to
implement partial reconfiguration on existing hardware
combining DSP and FPGA.
1.
INTRODUCTION
SDR is expected to be the most appropriate answer to future
multi-standards handsets design challenges [1]. We can
predict that SDR systems will be heterogeneous in terms of
computing resources, in order to deal with a wide variety of
radio applications. This implies many research activities in
the fields of multi-processing and heterogeneous computing.
All the more so as dynamic reconfiguration is involved. But
even if solutions exist for DSP [2] where it is more natural
to support reconfiguration, it is less common in the field of
FPGA. We propose here a combined approach, from the
application side to the implementation on the platform, that
particularly match to FPGA. The idea is based on the
following major point: the reconfiguration request implied
by SDR applications will have multiple level of granularity.
Taking into account these granularity levels will be of
particular importance to manage efficient and speedy
reconfiguration at all levels of the system. The functional
architecture proposed here takes into account application
needs depending on the granularity of the reconfiguration.
This functional architecture allows to extract as much as
possible the flexibility offered by any reconfigurable
heterogeneous platform. This is straightforward for
processor-based systems and particularly suits FPGA
despite FPGA's limitations in terms of dynamic
reconfiguration [3]. Whereas the full flexibility of our
functional architecture is not reached in an implementation
using current FPGA (limitation of the column-based
configuration memory structure [4] of Xilinx FPGA), it
allows to obtain the best of their reconfiguration ability.
The paper is organized as follow. Next part is concerned
with an algorithms analysis of a multi-standard transmitter
including baseband processing functions of GSM, UMTS
UTRA/TDD, and 802.11g OFDM mode. Configuration
management needs are deduced. According to the fact that
the flexibility is strongly related to granularity, part 3
proposes a hierarchical model of configuration management
which allows to handle the multi-granularity of
reconfiguration. Part 4 presents the resulting functional
hierarchical architecture deduced from the previous point. A
concrete implementation is detailed in part 5, insisting on
the FPGA dynamic reconfiguration aspects.
2.
APPLICATION ANALYSIS AND
CONFIGURATION NEEDS FOR
CONFIGURATION MANAGEMENT
Basically, The Software Radio aims at providing multistandards communications accesses using a hardware
platform based on shared processing resources. The
Software Radio hardware requires to be reconfigurable to be
able to switch from one processing mode to an other. It has
also to be heterogeneous to face the wide variety of
processing categories. It implies that the number of
configuration contexts to manage grows as the number of
standards increases multiplying the number of different
processing algorithms. The number of configuration also
depends on the type of reconfigurable resources of the
hardware platform. First, it is necessary to do a functional
baseband analysis to classify each baseband functions in
order to reduce overall configuration management
complexity. Having in mind the optimization of the
configuration management complexity, the classification of
the baseband functions in terms of parameters, needs of
hardware resources and needs of flexibility aims at reducing
the number of configuration contexts. The second step of
the application analysis is about the classification of the
context switching cases which determines the granularity of
a reconfiguration.
2.1. Multi-standards baseband functions analysis
The multi-standard functional analysis starts with a first
study of the baseband functions of the uplink transmitter of
three standards (GSM 900, UMTS UTRA/FDD, WLAN
802.11g mode). These standards have been chosen for their
wide variety of baseband signal processing. GSM is a
classical mono-carrier TDMA system, UMTS uses CDMA
techniques, and finally 802.11g offers a multi-carrier mode
(OFDM). Such a multi-standards transmitter involves all of
the conceptual challenges, to define a configuration
management architecture. We group the multi-standards
baseband functions into three functional classes presented
below. Gathering functions into classes highly favorises the
opportunity to use common operators [5] that can address
several processing roles by a simple change of parameters.
Fig 1: 3 Standard Baseband Functions Classification and the
computing demand resources
The Coding Class includes functions like cyclic coding,
convolutional and turbo coding. Standards use a wide range
of coding schemata. Despite the extensive set of parameters
to handle the coding functions, these are generally based on
few operators as 1-bit linear shift register and modulo-2
adders. A high flexibility of the processing resources is
mandatory to reuse efficiently the common operators.
The Data Handling Class puts together the functions which
manipulate data packets. Due to the nature of functions
(concatenation, segmentation, multiplexing, etc.), the data
packet lengths are very different. These functions are
exclusively dedicated to data transfers. This class of
algorithms is largely control-oriented. It mainly requires a
lot of memory resources and flexibility to process different
kinds of data.
The Modulation Class corresponds to the baseband
functions which take place before transposition on
frequency carrier. In this class, the functions are
computation-oriented and data are often oversampled. High
throughput is needed by the filtering functions.
The general goal in each class of processing is to group
similar processing to maximize the reuse of hardware.
Grouping functions into classes helps when dealing with
configuration management. Despite grouping the functions
in three classes, parameters from a function to an other are
quite different. This is the reason why we associated to each
function a specific Configuration Management Unit (CMU).
Explanations about our configuration management
architecture is detailed more in depth in the next section.
2.2. The reconfiguration needs of multi-standards
applications.
The general goal of our studies is to reduce de configuration
management complexity as the configuration overload by
minimizing the resources to reconfigure during any context
switching. Many design considerations are involved to
optimize the reconfiguration such as the architecture, the
design methodologies or the parameterization studies, with a
common goal: to enhance the reuse of the processing blocks
[6]. In this part, we discuss about the multi-standards
application switching needs and afterwards we lean on this
analysis to propose our configuration management
architecture. The different types of application switches that
we define are the following:
Standard Switching: The standard switch is the most
demanding one. The transmitting chain between 2 standards
are hugely different. Almost, all the baseband processing
have to be reconfigured. Moreover, a standard switch not
only implies the physical layer but also the other higher
layer of the protocol stack.
Mode Switching: A mode switch is considered to be an
intra-standard context switching. Some standards define
several functional modes. In most of the cases, the mode
switch does not include changes of the higher level layer
and the mode parameters are often defined by the MAC
Layer. For instance in 802.11g is defined with several type
of mode DSSS, FHSS, OFDM. So in the case of mode
switch, reconfiguration are lighter than in standard switch
and it is not worth performing the same huge
reconfiguration.
Service Switching: A service switch on the physical layer
remains a intra-standard switch rather than a service switch
to the application level that could implies a standard switch.
A standard switch could be a request to increase the data
rate. For instance in 802.11g mode OFDM, a request to
change the data rate from 6Mbit/s to 54Mbit/s implies to
modify at least the mapping function from BPSK to 64QAM. So the service switch could be perform with
parameterization and reconfiguration of some of the
baseband functions. Then, a partial reconfiguration of the
transmitting chain is generally sufficient.
Performance enhancement, bug fixing:
The ability to reconfigure small parts of a processing chain
is necessary to allow some performances enhancement or
bug fixing when the system is already in use . The
possibility to change any processing patterns in chain is
closely related to design methodologies which have been
used during the definition of the processing chain
architecture and the possibilities of the hardware resources
to change such small parts of a given function. This type of
reconfiguration has to be performed without any
discontinuity of service for the user.
Actually, the different kinds of context switching, detailed
above should be supported in a SDR system. The given
configuration management architecture has to be able to
manage the different granularity levels of configuration to
optimize the reconfiguration. We propose, in the next
section, a hierarchical configuration management
architecture that will efficiently enable the handling of the
multi-granularity of configuration.
3.
HIERARCHICAL CONFIGURATION
MANAGEMENT MODELLING
3.1. Config-Data Path Model
The communication applications are dataflow oriented, then
our approach, detailed in [7], is based on a data path model.
The functions of the SDR transmitting chain are mapped
into several Processing Block Units (PBU). Following this
approach, each PBU is optimized using specific
reconfigurable hardware resources.
In addition, a configuration path, also split into several
Configuration Manager Units (CMU), controls the
reconfigurable processing path. Each CMU, dedicated to a
type of PBUs, manages the configuration of a type of
baseband function in the chain. The split configuration path
offers the possibility to partially reconfigure the transmitting
chain by an independent reconfiguration of each PBU. The
distributed configuration management approach also
decreases its design complexity.
3.2. Hierarchical Configuration Management
Architecture
The section above presented the benefits of the
configuration datapath model. It is a base to manage the
heterogeneity of baseband processing functions and enable
the partial reconfiguration of the SDR transmitting chain.
The hierarchical configuration management model
illustrated in the Figure 2 is based on the config-data path
approach. This model is necessary to manage the multigranularity of configuration required by the different context
switching detailed in the section 2.2. It is composed of three
levels of hierarchy that are detailed below.
Figure 2: Hierarchical Configuration Management Model
Hierarchical level 1
The first level is split into 3 entities following the functional
classification described in Fig 1. This first high level
classification allows a control of category-specific functions
to manage parameters at the standard level. The
Configuration Manager L1 (L1_CM) works at the standard
level as host toward the underlying levels of management.
This entity is in charge of choosing the functional units
which will constitute the entire configuration of the
baseband processing chain. At this level, generic functions
are handled as generic components. Any hardware
implementation is not yet considered.
The generic functions selected at level 1 are parameterized
at level 2 in accordance with standard specifications. The set
of attributes of each function is handled by the
Configuration Manager Unit L2 (L2_CMU) to create each
functional context of the entire processing chain. For
example, the generic function "bit to symbol mapping" of
the modulation class is set at level 2 to fit the selected
standard. In the case of 802.11g standard this setting could
be BPSK, QPSK, 16-QAM, 64-QAM constellation. In the
case of a mode switching the needed reconfiguration
directly involved the concerning L2_CMUs.
The processing data path architecture of this third level will
depend on the reconfigurable computing resources of the
hardware architecture. The complete processing path could
be formed by different types of reconfigurable resources in
accordance to the studies detailed in the first section. It
could correspond to configurable accelerators, array of DSP
blocks or a fine grain reconfigurable data path. The main
task of the (L3_CMUs) in the configuration path is to find
the available processing resources and configure them to
enable the execution of the functional context created at the
above level.
4.
HIERACHICAL FONCTIONAL
ARCHITECTURE
This section presents a functional hardware architecture
made up of three main reconfigurable clusters. After an
overview of this heterogeneous architecture, each cluster is
described in further details.
This architecture fits the hierarchical configuration
management considerations, proposed in the previous
section. It allows first to answer the needs of handling the
multi-granularity of the context switching. Second, the
hardware architecture is separated into three main hardware
clusters. The cluster resources are specialized according to
the classification of the baseband functions into three
classes presented in section 2.1 and required performances.
These three clusters have the same general architecture: a
cluster is designed around a dedicated central processor with
separated data bus and program bus (as a typical Harvard
architecture). This allows to distinguish the configuration
data path from the processing data path. The processor unit
of the CM_L1 controls the three clusters to work together.
The CM_L1 also manages the whole application
parameters.
The configurations of each cluster are controlled by the
L2_CM processor units. Each L2_CM is interfaced to a
processor designated as master. In addition to this master
processing unit, some other reconfigurable units are used to
speed up the data processing of the cluster. These
reconfigurable accelerators are various as the baseband
functions have different needs. So a L3_CM is associated
with each different reconfigurable accelerators.
Coding Cluster: Software implementations, for the
functions of the coding class, are possible. But hardware
will be more efficient and will consume fewer resources.
Since data width is only one bit for this class of functions,
software implementations are generally under optimized
because of using 16/32 bits processors. Furthermore
hardware implementation is necessary to reach the expected
high throughput performances of 54Mbps in 802.11g. On
the other hand, facing the wide variety of coding schemes,
software flexibility (DSP) is mandatory. Consequently, the
coding cluster is composed of DSP aided by reconfigurable
co-accelerators accelerated through efficient dedicated
coprocessors corresponds to a suitable architecture for this
class of processing functions.
Data Handling Cluster: The high flexibility offered by the
GPP processors is interesting to run the wide variety of
handling functions of every standards. The data handling
functions almost perform only memory transfers, so a DMA
interface is useful between the processor and the memory.
The memory is a sizable arrays of SRAM blocks, it allows
to resize the array by switching-off the unused memory
blocks, depending on the handling function. Power saving
architecture design, for this class of functions, is a key
factor, as memory consumes a lot of power.
Modulation Cluster: Most functions handle data, up to 32
bits, which takes a lot of resources. Consequently, some of
these functions, like pulse shaping filtering will take
advantage of an implementation in dedicated configurable
hardware accelerators. One other point is the intensive
processing requirements of the Modulation class functions
which often necessitates a HW implementation.
The main goal of this model is to help defining features
needed by the hardware platform for SDR applications. The
Fig 3 depicts the model as an architecture with lots of
duplicated resources (multiple microprocessors and buses).
This just illustrates, for a good understanding, our
hierarchical view of the hardware platform.
Fig 3: Hierarchical functional architecture
For instance, in the CM part of the model, the multiple
microprocessors (called µP) are instantiated as multiple
tasks running on a processor of the hardware platform.
The next section illustrates in a more detailed manner the
effective implementation of this functional architecture in a
real platform.
5.
HETEROGENEOUS HARDWARE SDR
PLATFORM
The use of reprogrammable (DSP), reconfigurable (FPGA)
devices [8] and more generally reconfigurable computing
architectures is commonly admitted for Software Radio. We
present the heterogeneous (GPP/DSP/FPGA) platform and
especially FPGA reconfiguration requirements to enable the
implementation of our model. We focus on the FPGA,
because it is foreseen as a good intermediate architectural
possibility between DSP and ASIC. FPGA combines the
flexibility of DSP and the throughput efficiency [9] of
ASIC. But this really makes sense only if we can efficiently
manage reconfiguration of FPGA. Mandatory requirements
are partial and dynamic capabilities of reconfiguration for
FPGA. Currently, a few FPGA devices enable dynamic and
partial reconfiguration [10]. Moreover, the system
architecture [11], [12] is a key factor to make embedded
these kind of reconfigurations.
5.1 From the Architecture Model to the platform.
The implementations on the platform are currently under
development. We present in this part the platform and the
drawn up implementations.
Our prototyping platform is composed of a GPP, a DSP, a
FPGA and different types of memories. The functional
architecture of the Fig 3 is mapped into this platform as it is
illustrated in the Fig. 4. It consists in gathering on each
category of HW processing device (GPP, DSP, FPGA) on
the one hand the PBU (in white on Fig. 4) and on the other
hand the CMU depending on their hierarchical level.
configuration of the functions that run on the cluster
resources.
The FPGA of our platform is a Virtex -II device from
Xilinx. The following components of the functional model:
the hardware accelerators, the small co-accelerators and the
sizable array of blockRAMs are mapped into the FPGAs.
The partial reconfigurability is of course a mandatory
features to allow reconfiguration of a single component. The
L3_CMUs responsible for the configuration of the coaccelerators are implemented as task into the µBlaze soft
processor. It allows to perform fine grain reconfiguration of
the FPGA without involving any external resources. Next
section details furthermore the internal architecture of the
FPGA to enable the partial dynamic reconfiguration.
Components Library: The Hardware and software designs
of the processing functions are stored in the external storage
memory of the platform where the configuration
management takes them.
5.2 FPGA architecture on the embedded platform.
At the initial stage the CM of the Host (GPP) downloads the
DSP boot program that includes in its data memory the
initial full configuration of the FPGA. It consist in the
FPGA design architecture. It includes an internal
configuration controller (µBlaze soft processor), the internal
reconfiguration interface (ICAP), the initial instantiations of
PBUs and the communication interfaces with the DSP.
Fig. 4: Platform architecture
The GPP and the external storage memories are resources
used from a standard PC station. The GPP is the host of the
rest of the platform. The L1_CM is a task running on the
GPP which controls the configuration the other platform
resources (DSP, PFGA). The control-oriented functions as
some Data Handling functions take advantages of the
flexibility of GPP.
The DSP is a C64 from TI. The DSP parts from the both
clusters Modulation and Coding Clusters are tasks running
of the C64. The DSP works as the master of the
(DSP/FPGA) subpart of the platform, Then the L2_CMUs
of the three cluster are also mapped on the DSP. The
position of master allows the DSP to manage the overall
Fig. 5: Details of the FPGA architecture
The Fig. 5 illustrates this platform architecture with details
of the internal FPGA design that enable two types of
dynamic partial reconfiguration depending on their
granularity level. One stays internal in case of limited-scale
reconfiguration (for co-accelerators configuration) or design
parameterization (auto-reconfiguration). This implies to
interconnect the µBlaze to the ICAP internal configuration
interface. This kind of reconfiguration of the FPGA by an
processor (µBlaze) embedded in the FPGA is called self-
reconfiguration or auto-reconfiguration [13]. In this case,
small partial bitstreams are stored inside the FPGA, and the
use of auto-configuration let free the other HW resources of
the platform. At a larger scale reconfiguration for the HW
Accelerator is external. This implies to interface to
interconnect the DSP to the external SelectMap or internal
ICAP
reconfiguration
interfaces.
The
Bitstream
corresponding to the design of HW Acc. are stored in an
external SRAM memory.
6.
CONCLUSION
Through the proposed models, this paper shows the tradeoffs to provide a full reconfiguration flexibility with a
reasonable hardware complexity from configuration
management to the hardware architecture. Actually, the
hierarchical functional architecture offers the maximum of
the reconfiguration flexibility. It allows to take as much
flexibility as possible from any kind of heterogeneous
platforms. As this functional architecture is not specific to
any platform or device, it will improve itself and will easily
take advantages of the future evolutions of reconfigurable
computing technologies in terms of flexibility. Following
this idea we already achieve to take into account existing
limitations concerning the partial reconfiguration of FPGA.
7.
REFERENCES
[1] J. Mitola, "The software Radio architecture," IEEE Comms
Mag, vol. 33, no. 5, pp. 26--38, May 1995.
[2] C. Moy, A. Kountouris, A. Bisiaux, "HW and SW
Architectures for Over-The-Air Dynamic Reconfiguration by
Software Download," SDR Workshop of the IEEE Radio and
Wireless Conference, Boston, USA, Aug. 2003
[3] J.P. Delahaye, G. Gogniat, C. Roland, P. Bomel, "Software
Radio and Dynamic Reconfiguration on a DSP/FPGA
Platform," 3rd Karlsruhe Workshop on Software Radios, proc.
pp 143-151, Karlsruhe Germany, March 17-18 2004.
[4] "Virtex Series Configuration Architecture User Guide,"
Xilinx, Inc., 2100 Logic Drive, San Jose, CA
95124,XAPP151 (v1.6) March 24, 2003
http://www.xilinx.com/bvdocs/appnotes/xapp151.pdf.
[5] J. Palicot, C. Roland, "FFT a basic Function for a
Reconfigurable receiver," ICT Conf., Tahiti, 2003.
[6] DeHon, J. Adams, M. DeLorimier, N. Kapre, Y. Matsuda, H.
Naeimi, M. Vanier, and M. Wrighton., "Design Patterns for
Reconfigurable Computing," fccm, pp. 13-23, 12th Annual
IEEE Symposium on Field-Programmable Custom
Computing Machines (FCCM'04), 2004.
[7] J.-P. Delahaye, J. Palicot, P. Leray, "A Hierarchical Modeling
Approach in Software Defined Radio System Design," SIPS
2005, Athens-Greece, Nov. 2005.
[8] M. Cummings, S. Haruyama, "FPGA in the Software Radio,"
IEEE Comms. Mag., vol. 37, no. 2, pp. 108-112, Feb. 1999.
[9] T. Claasen, "High Speed: Not the Only Way to Exploit the
Intrinsic Computational Power of Silicon," Digest of Tech.
Papers ISSCC 99, IEEE Press, pp. 22-25, 1999.
[10] S. Donthi and R.L. Haggard, "A survey of dynamically
reconfigurable FPGA devices," Proc. IEEE Symposium on
System Theory, pp. 422-426, 2003.
[11] M. Ullmann, M. Huebner, B. Grimm, J. Becker; "An FPGA
run-time system for dynamical on-demand reconfiguration,"
18th International Parallel and Distributed Processing
Symposium, 2004. Proceedings.pp.135, 26-30 April 2004.
[12] J.C. Ferreira, M. M. Silva, "Run-time reconfiguration support
for FPGAs with embedded CPUs: The hardware layer," Proc.
of the 19th IEEE International Parallel and Distributed
Processing Symposium (IPDPS’05).
[13] B. Blodget, P. James-Roxby, E. Keller, S. McMillan and P.
Sundararajan. A Self-reconfiguration Platform. In proceeding
of 13th International Conference on Field- Programmable
Logic and Applications, FPL’2003, pp. 565-574. Sept. 2003,
Lisbon, Portugal.
Partial Reconfiguration of FPGAs for Dynamical
Reconfiguration of a Software Radio Platform
Jean-Philippe DELAHAYE, Jacques PALICOT, Christophe MOY, Pierre LERAY
IETR/Supélec - Campus de Rennes
Avenue de la Boulaie, CS 47601, 35576 Cesson-Sévigné CEDEX, France
phone: + (33) 2 99 84 45 00, fax: +(33) 2 99 84 45 99
Email:<jean-philippe.delahaye,jacques.palicot,christophe.moy,pierre.leray>@rennes.supelec.fr
Abstract— In the context of a Cognitive Radio Terminal we
propose to use Partial Reconfiguration of FPGAs in order to
obtain a enhanced wide band Software Radio Platform. That
means the reconfiguration should be performed dynamically
(during the run time). Only Partial Reconfiguration of FPGAs
could currently meet this requirement both in terms of power
computing efficiency and reconfiguration speed.
In this paper we show how it is possible to perform this type
of behavior within an heterogeneous platform. We consider in
detail three different situations illustrated with the three following
examples: the dynamical reconfiguration of a convolutional coder,
a constellation mapper, and a FIR filter.
I. I NTRODUCTION
Within the framework of the European IST End to End
Reconfigurability phase two (E 2 R II) project, we propose to
perform dynamic reconfiguration using FPGAs. This behavior
is included in a complete baseband platform developed by
other partners of the project.
New wireless transceiver [3] based on the Cognitive Radio
(CR) concepts [1] will be feasible when the wideband true
Software Radio Technology [2] will be available. This implies
a lot of signal processing performed in real time. Another
important requirement is that some function should run during
the reconfiguration of others functions. Dynamically reconfigurable FPGAs can provide this feature because they offer a
new design space with a variety of benefits, among others:
flexibility and reusability at run time. The dynamic reconfiguration is closely related to partial reconfigurability of FPGA
[12]. Indeed, the partial reconfigurability allows to selectively
change segments of a FPGA functionality without suspending
operations of the remaining parts. There are several benefits for
partial reconfiguration (PR): it reduces the configuration time
and saves storage memory as the partial reconfiguration files
(bitstreams) are smaller than full ones. It may limit also the
overhead and the bandwidth spending for code downloading
in case of OTAR. Reconfigurable computing [4], [5] has been
proposed in a large range of signal processing applications in
order to improve high performance, flexibility and adaptability.
The development of wireless communication systems has
revealed the need to dynamically adapt systems architectures
at the hardware level, as in Software Radio (SWR) system [6].
[7] proposed a first vision of SWR transceiver using dynamical
reconfiguration. PR on a Virtex-E FPGA was described in this
work. The PR has been extended at the system level for the
Fig. 1.
Different types of dynamic reconfiguration.
heterogeneous SWR platform in [10]. In this paper we are
going further, describing several implementations of partially
reconfigurable processing modules using the partial reconfiguration capability of the Virtex-4. The system architecture that
should be adopted to achieve the PR is also considered here.
Furthermore, in this paper we address new PR capabilities of
the Virtex-4 FGPA, comparing with Virtex-II. They provide
more flexibility and enhanced reconfiguration performances.
The dynamic reconfiguration is used to share a group of
configurable elements between several processing contexts.
This is a resource multiplexing in time. We can distinguish two
types of dynamic reconfiguration which are illustrated in Fig.
1. Figure 1.1 shows the case of the dynamic reconfiguration
with data dependency between the functions T 2 and T3 that
share the same reconfigurable resource area. In this case, it
is necessary to ensure the data temporary storage. Figure
1.2 shows the use of dynamic reconfiguration without data
dependency between two functions. For example, the second
case could occur when the transmission environment changes
[11], (signal-to-noise ratio, available power, etc.), and when
a more efficient function or a less power consuming one is
required. This situation occurs classically in Cognitive Radio
systems. The first case may happen when processing blocks of
a large function are multiplexed. This is useful to reduce the
area of the function when needed. We have presented in [13]
an application that takes advantage of the first type dynamic
reconfiguration. We focus in this paper on this second type of
dynamic reconfiguration.
The rest of the paper is organized as follow. The state-of-
the-art of reconfiguration of SWR platforms is given in section
II. The need of partial reconfiguration is also highlighted.
Section III deals with the management of Partial Reconfiguration. Section IV describes implementation alternatives through
three examples of PR modules. In this section the general
architecture of a PR module is also provided. The conclusions
are stated in section V.
II. STATE OF THE ART OF THE PARTIAL
RECONFIGURATION
A. System Architecture for the achievement of Reconfiguration
Several alternatives of system architectures are already
possible to ensure the dynamic partial reconfigure of FPGAs.
Depending on the FPGA reconfiguration capabilities, reconfiguration can be done two different ways: the external or the
internal reconfiguration. It depends on both the reconfiguration interface and reconfiguration controller available on the
platform. An overview of the corresponding architectures are
proposed in Fig.2. In Fig.2.1 the configuration controller is
an external processor connected to the external configuration
interface of the FPGA: the SelectMap port. In the second
architecture, both the controller and the interface are internal.
The configuration controller is an embedded processor, and
the configuration interface is the Internal Configuration Access
Port (ICAP). In both cases, the logic resources are separated
into two type of areas, the fixed logic and the dynamically
reconfigurable area. Each of these two parts may contain
several modules. On the one hand the fixed area contains
modules whose behavior never change. On the other hand
the reconfigurable modules should be designed in a column
(rectangular area) as the reconfiguration on Virtex-II is done
by reloading entire columns of CLB. The interconnection of
a reconfigurable module with an other reconfigurable or fixed
module has to be realized through a special communication
interface called Bus Macro. These Bus Macro provided by
Xilinx are used to ensure the right place and routing of signal
crossing over PR area as explained in [7].
The FPGA are based on SRAM Configuration Memory
(CM) (or configuration plane). This is the plane usally used to
configure the FPGA at boot time. The CM of the Virtex-E/II FPGA series allows to create partial bitstreams containing
the whole column height of a CLB configuration data. One
of the most important enhancement in terms of PR capability
on the Virtex-IV serie is that the CM of Virtex-4 is a Block
Frame Based configuration memory. It allows to design any
rectangular reconfigurable area, even smaller than a column.
The size of the bitstream may be greatly reduced as just
the block frames belonging to the PR area are included in
the bitstream, rather than at minimum all the column frames
like on Virtex-II. It increases the flexibility of design, and
performances of PR as well. The bandwidth of the V4 ICAP
is also 8 times greater than the ICAP bandwidth of Virtex-II
so the reconfiguration time is speeded-up. As the V4 FPGA
allows to design rectangular PR area, Xilinx provides a new
version of its Bus Macro for both horizontal and vertical signal
Fig. 2.
Fig. 3.
Macro
Two architectures to perform PR on FPGA.
Rectangular PR area on Virtex-4 with horizontal and Vertical Bus
inteconnections. These new versions of Bus Macro are based
on pre-routed and pre-placed LUTs as shown in Fig.3.
B. The need of Partial Reconfiguration
In [8], we identified that a multistandard device should
perform a lot of functions. These functions could be classified
in three classes as detailed in Fig. 4. Each class of functions
is either memory-oriented, computation-intensive or needs
implementation flexibility to answer the wide range of coding
patterns. Each cluster architecture is oriented depending on the
processing requirements of the different classes of functions.
We have proposed in [10] a system architecture based on these
clusters. Moreover, each cluster is characterized by the type
Fig. 4.
Classification demands of Computing Resources.
of operation (the data size, the memory, the frequency, the
computing resources required). In fact, it is feasible to share
part of the processing resources between several standards at
the price of the use of reconfiguration. But as standards present
functions communalities just some sub-part of the standards
should be changed during a standard switching. So Partial
reconfiguration is an essential feature of SWR systems. But
this feature has only been reserved to processors until now:
changing a sub-part of the executable code of a DSP, for
instance as in [14]. We show hereafter how this concept of
PR may be generalized to the reconfigurable HW world.
III. MANAGEMENT OF THE PARTIAL
RECONFIGURATION
The presence of a Xilinx V-4 FPGA on the E 2 R II
demonstrator offers the opportunity to implement partial reconfiguration of FPGA, which is of great interest for future
SWR systems design. The PR of the Virtx-4 is involved
on the platform as follows: each processing function/module
potentially contains sub-parts which will be reconfigurable
to enhance the flexibility of the function. There dynamically
reconfigured sub-parts are mapped into reconfigurable areas
in form of PRMs (Partial Reconfigurable Module). The PR
implementation necessitates to connect to ICAP in the chosen
context of auto-reconfiguration as shown in Fig.2.2. Autoreconfiguration means that the reconfigured areas of the FPGA
(PR Regions) are managed by the FPGA itself, through an
embedded processor, such as the soft-processor MicroBlaze
provided by Xilinx. This latter could be much simpler, as a
state machine, but the MicroBlaze will also be responsible of
other managing tasks. For instance, the MicroBlaze could also
be the control interface with the NoC network, as in solution 1
of Fig. 5 exposed hereafter. In the context of E 2 R II platform,
the PR processing modules inside the FPGA are linked to
the other processing modules of the board through the NoC
network which is extended from the CEA Faust SoC [9] to
the FPGA. Several solutions can be considered to connect the
PR processing modules to the NoC network. Two of them are
suggested below to illustrate the different approaches of the
insertion of PR processing modules in the processing chain of
Fig. 5. Solution 1 for the architecture of Virtex 4 and its interconnexion
with NoC from CEA [9]
the SWR platform. Note that in both cases, the reconfiguration
link between the MicroBlaze and the processing modules is
hidden to the FPGA logic resources plane since it goes through
ICAP into the configuration plane.
A. First Solution
In solution 1 of Fig.5, the NoC [9] is connected to the
PR architecture through the MicroBlaze while the PR subparts of the system remain quite independent. The MicroBlaze
manages on the one hand the data transfers coming from
(respectively going to) the NoC towards (respectively from)
the processing modules with PR capabilities. On the other
hand, it also manages the configuration of the PR Region
inside each PR processing module. The PR Regions are
highlighted by additional red rectangles in the diagrams. The
PR region will support the reconfigurable tasks which will
allow to change the functional behaviour of the processing
module. Three configurations are drawn on this figure for PR
Region A: PRM A0, PRM A1 and PRM A2. PRM Ai are the
bitstreams corresponding to the PR Region A. This results in
three potential processing capabilities for the corresponding
processing module. They can be changed at run-time, in other
words while the rest of the FPGA still runs. Depending on
the reconfiguration duration, data should be stopped at the
entrance of a PR processing module under reconfiguration.
The reconfiguration tasks could also be chosen in an idle time
between two data buffers computation.
B. Second Solution
Main drawback of solution 1 is that the MicroBlaze risks
to be overloaded by data transfers activity. Consequently, a
second solution is proposed in Fig. 6. Solution 2 includes
the processing modules supporting PR in the NoC architecture itself, in the sense that each PR processing module is
connected independently to the NoC for data transfers. A
MicroBlaze is still necessary as one of the nodes of the NoC.
Its role is here only to manage the PR Regions bitstreams.
No specific connection is necessary between the MicroBlaze
and the dynamically reconfigurable processing modules since
Fig. 6.
Zoom on Virtex 4 architecture for solution 2.
PR is operated through the ICAP and the configuration plane
of the FPGA. This physically separates data transfers from
control transfers on two different communication means. This
is a great advantage in order to avoid contention between data
and reconfiguration transfers.
IV. E XAMPLES OF P ROCESSING MODULES
A. General architecture of a PR processing module
Fig. 7.
Partially Reconfigurable Constellation mapper
to change the coefficients of the FIR. Consequences are more
important than expected at a first glance.This approach prevents from designing and consuming communication resources
to access several sets of coefficients. This includes address
and data buses for RAM access. ROM is directly reconfigured
through the configuration plane. Even concerning a common
functionality, many considerations can be taken into account,
and consequently derive several implementations.
Depending on the nature of the processing module, the PR
region (red rectangle) may support functionally different subparts of a module, as for instance:
• for a FIR: coefficient storage zone,
• for a FIR (another implementation): controller that manages the FIR length,
• for a convolutional coder: routing paths,
• for a constellation mapper: fully reconfigured.
This set of functions has been chosen to explore as many
situations as possible. We propose to detail each of these cases
hereafter. The general architecture of a PR processing module
includes a PR Region, which supports the PRM part of the
module, but also a fixed control part and a fixed processing
part.
B. Constellation Mapper
In a PR Region, it is firstly possible to consider the
implementation of different tasks occupying the same size in
terms of configurable elements. This is a priori the situation we
can expect when thinking at the PR of FPGA. But this is only
a specific situation from which can be derived many others.
This is illustrated by Fig. 7 in the context of a constellation
mapper. Mapping from BPSK, to 16 QAM, 64 QAM, etc.
is directly provided in the bitstreams of the PRM. This is a
module-based PR Region that necessitates to be connected to
other design areas through Bus Macro.
C. FIR Implementations
Fig. 8 refers to the case of a FIR whose coefficients must be
changeable. This is here the set of coefficients that is located in
the PR Region. FIR coefficients are stored in FPGA memory
blocks configured as ROM. The ROM blocks are reconfigured
Fig. 8.
FIR implementation 1 for pulse-shaping flexibility.
Fig.9 for instance takes again the example of a FIR, but with
a different goal than in previous figure: change the length of
the FIR. This includes changing the coefficients (as previous
case), but also a sub-part of the control finite state machine
that manages the number of calls of the 4-taps FIR, and a set
of registers to memorize some temporary data. These three
items are included then in the PR Region.
D. Convolutional Coder Implementation
Fig. 10 shows how this is implemented for the example
of a convolutional coder. Here, PR is used to configure a
TABLE I
B ITSTREAMS S IZE AND R ECONFIGURATION T IME ON A V IRTEX -II 2000
IP
Bitstream Size
Reconfiguration Time
Fully Reconf. FIR
74.7 kB
1.5 ms
PR FIR
25.7 kB
510 µs
2 kB
40 µs
PR Conv. coder
of the FPGA which is about 16 ms for the xc2v2000 device.
This stresses that many design choices may impact the size of
the bitstream and consequently the PR efficiency, in function
of the design effort. But this issue is out of the scope of this
paper.
Fig. 9.
FIR implementation 2 for filter length flexibility.
reconfigurable Switch Box (RCSwb) that allows changing the
polynomial generator of the coder. The aim is to obtain very
small size bitstreams for a minimum memory occupancy and
a minimum reconfiguration time.
V. C ONCLUSION
We propose in this paper to take benefit of the PR in order
to enhance current SWR and future CR system capabilities.
FPGA PR permits indeed to bring run time reconfiguration to
the reconfigurable HW world. We propose a general architecture for the design of PR modules, and illustrate it for three
examples. The next step of this work is to validate this concept
within the platform of the E 2 R II project.
VI. ACKNOWLEDGMENT
This work was performed in project E 2 R II which has
received research funding from the Community’s Sixth Framework programme. This paper reflects only the authors’ views
and the Community is not liable for any use that may be made
of the information contained therein. The contributions of
colleagues from E 2 R II consortium are hereby acknowledged
R EFERENCES
Fig. 10. Convolutional coder with a dynamically reconfigurable switch box.
As previously stated in the other examples, the sending of
new polynom values does not require the use of a dedicated
communication means for configuration data transfer. This
is automatically operated through the configuration memory
plane which permits to define the FPGA run-time design.
E. Partial Reconfigurations Results
The table I gives some results and orders of magnitude in
terms of partial bitstreams size and reconfiguration duration for
the listed PR IP on a Virtex-II 2000. The results given in table
I are highly dependent on the chosen design methodology. As
shown in table I, adopting a design methodology to partially
reconfigure a sub-part of the FIR (the PR FIR) allows to
significantly save reconfiguration time (510 µs) comparing
with the IP that is fully reconfigured in 1.5 ms. Note that
even the latter case is 10 times better that a full configuration
[1] J. Mitola, Cognitive Radio, Ph.D. dissertation, Royal Inst. of Tech.,
Sweden, May 2000.
[2] J.Mitola, ”The software Radio Architecture”, IEEE Communications
Magazine, May 95, pp. 26-38.
[3] Christian Roland, Jacques Palicot, ”A New Concept of Wireless Reconfigurable Receiver”, IEEE communications Magazine, july 2003,pp.
[4] K. Compton and S. Hauck, Reconfigurable computing:a survey of systems
and software. ACM Comput. Surv.,vol. 34, no. 2, pp. 171210, 2002.
[5] R. Hartenstein, A decade of reconfigurable computing: A visionary
retrospective, date, vol. 00, p. 0642, 2001.
[6] M.Cummings and S.Haruyama, Fpga in the software radio, IEEE Communications Magazine, vol. 37, no. 2, pp. 108112, Feb 1999.
[7] J.P. Delahaye, G. Gogniat, C. Roland, P. Bomel,”Software Radio and Dynamic Reconfiguration on a DSP/FPGA Platform”, Frequenz n58(2004)56, Germany.
[8] J.P. Delahaye, J. Palicot, P. Leray, ”A Hierarchical Modeling Approach
in Software Defined Radio System Design”, IEEE Workshop on Signal
Processing Systems, SIPS 2005, Athens (Greece), November 2005.
[9] R. Rabineau, D. Lattard, Y. Durand, M. Lobeira and JP Rossi, ”Flexible
Test-Bed for B3G Systems”, IST mobile summit 2006.
[10] J.P. Delahaye, C. Moy, P. Leray, J. Palicot, ”Managing Dynamic Partial
Reconfiguration on Heterogeneous SDR Platforms”, Sdr Forum 2005,
November, Los Angeles, USA.
[11] J. Liang, R. Tessier, and D. Goeckel, A dynamically reconfigurable
power-efficient turbo decoder. in FCCM. IEEE Computer Society, 2004,
pp. 91100.
[12] Xilinx, Early access partial reconfiguration user guide,ug208, 2006.
[13] H. Wang, J.P. Delahaye, P. Leray, J. Palicot, ”Managing dynamic
reconfiguration on MIMO Decoder” in 14th Reconfigurable Architectures
Workshop, RAW 2007, Long Beach Ca, USA.
[14] Apostolos Kountouris, Christophe Moy, ”Reconfiguration in Software
Radio Systems”, Second Karlsruhe Workshop on Software Radio, March
2002.
'-
$
"#0 -
504
-
"
, '< 0 '#0.
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
'-
504
$
"#0 -
*
-
& &
.$
,$
# #$
03
c
=d%
% )
G
c
;d , /
92
0
9
&
H< G
'-
'- "
G
H< G
)
!
A
<
1 "
%
%.
5$
#0,
5$
#0,
1$ $
!
6
9
% )
(
*
<
H
6
9
<
H
Decide
Sense
Adapt
% )
% )
) .!
4M
:"% " .
+
O
K
*L3
&5@
5
&
2
$ %+# 3
;5
%
%.
6
2
2
&'
<
,
9
9
9
( &
6
6
<
'9
9
6
6
96
G
>
( &
9
H
<
K
6
D
, /
(
6
/
c ( d$ 9
% )
9 6
6
−
>
2
%
>( 6 <
−
)<
9
<
−
"
, '< 0 '#0.
?
'-
$
"#0 -
504
-
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
smart
sub-system
analysis
stimuli
learning
user
SDR communication
sub-system
network
environment
Application layer
sensing
means
Multiple physical layers
Multiple
Multiplephysical
physicallayers
layers
electro-magnetic
environment
'9
M
7!
$
adapting
means
…
hardware
) .!
orders
decision
&
%
%.
&!
%
& .
A
96
, #
3
>
6
6
6
2
9
9
9
>6
6
2
>
>
/
6
c
=d
>
'
$
6
0$ %
"&
!
7
7
7
>
2
>
9
9
&;%
M P& # #
"&
!
&
'9
?
"$
%
%.
>
9 >
1 , - 4
6
>
,
%
>6
6
69
6
9 >
-1'
) '
2
/
,
5 %1
%
"
<
9 & N)
&
, U !01
-1 5
6
9 (
&
1
>&
*
2
+ >
6
/
! )
, '< 0 '#0.
"
%
%
&
'-
504
$
"#0 -
-
9 >
.$
,$
# #$
03
(
9
9
Level 1
L2_CRMU
reconfiguration
orders
L2_ReMU
(M
0
%& "
! .
'
reconfiguration
orders
(parameters)
"!
".
0
.
' =
(
>
2
>
, g' 5% ) ,
> <
9
>
(
>
>
+.! &
/
>
, g 5% )
6
>
9
6
(
9
metrics,
notifications
Level i
Li_CRM(U)
Level i-1
* M )%
"
towards
next
PBU
>
Level i+1
) .!
c ( d
L3_ReMU
metrics or
reconfiguration
sensing
orders
information
processing block unit
(PBU)
from
previous
PBU
'- "
network
reconfiguration orders
L1_ReM
L3_CRMU
'-
2
L1_CRM
Level 2
1 "
9
reconfiguration
orders
Level 3
) .!
2
<
network
metrics
high level
metrics or
sensing
information
1$ $
!
reconfiguration
orders
reconfiguration
orders
Li_ReM(U)
metrics,
notifications
#" & .
"
% #%
#$
".
'
, '< 0 '#0.
reconfiguration
orders
%& "
"!
.
?
'-
$
"#0 -
3
504
-
2
69
.$
,$
# #$
03
1$ $
!
1 "
9 >
>
9
6
2
9
><
9
P $
9
6
9
9
9A
/
2
% ="
9
,
)<
5,
+K
9
2
6
<
2
5,
Q
6
<
"
6
<
6
9
$
3 $
c
= d
T
A
6
>
&
&
, /
5,
;
+
:
2
>
c ( =d
<
6
9
6
&
5,
< T
+
, U !01
1< 6
<
T
,M
# &$
2
/%
%& "
"
"!
, '< 0 '#0.
6
<
>6
>
9
#
) .!
/
6
&
>
3#, 5
+
&
5 '
!
6
5,
-1'
?
'- "
-1 5
J
#
6
9
+
'-
.
".
'
'
2
'-
$
"#0 -
'
504
+
-
&
K
9
(
96
2
>6
1
9 >
1 "
6
$ 9
9
>
6
K
&
<
>6
,
I
>6
Q 9
9
(
9
(
2
/
@
: L1_CRM
: L1_ReM
Order: (None, None)
Info: Null
: L3_ReMU ISI_capture
T
>
6
96
9
)
Order: None
Info: Wait for entry
: L2_ReMU_func_fusion
: L2_CRMU_func_fusion
Order: (None, None)
Info: Null
Order: None
: L3_ReMU Equalizer
: L3_CRMU ISI_capture
Order: None
Info: transparent
Order: None
Order: None
Operator
Equalizer
Metric value: none
State: activated
Metric value (%) : 75
) .!
>6
%,=g 5 )
>6
-M
"# &
#"
$
;&
K
=
%,=g' 5 ) ,
2
6
9
9 >
<
>
2
9
0
A
>(
2
+
9
9
2
<
69
9
$"
9
$ "<
, g' 5
9
9
+
9
: L3_CRMU Equalizer
Order: None
Info: Null
Operator
ISI_capture
9
9
6
6
, ,=g' 5
A
1
6
(
6
>6
92
9
>
9
%$ ") ,
9
$ " %$
"$
96
<
'- "
&
K
+K
'-
9
2
&
1$ $
!
<
(
<
&
.$
,$
# #$
03
D
(
K < 9
6
9
9
<
2
/
1
K
"
9
>
, '< 0 '#0.
@
%
?=
'-
504
$
"#0 -
>M
6
9
K
3
.$
,$
# #$
03
6
<
9
'-
9
$"
9
(
'- "
>
6
9
+
&
6
6
<
2 K
&
&
&
>6
9
( &
,
<
K
6
2
2 $ 9
<
6
9A
>
6
>
1 "
)
L
>
1$ $
!
6
<
9
;= ,
9
>
9
<
>&6
>6
(
(
>
6
3
9
>
9
'
K
$
$ 9
9
9
<
A
6
< 6
+K
6
> 2
69
&
'
>
&
> <
6
6
&
3
D
A
A
6
A
>
>
0
5
9 >
9
0
A
>
-1'
<
<
M
>
9 >
<
92
>6
9
6
,
6
J
9
2
>&
&
6
9
+ +
A
(
>
*9
> 6
,
>&
A
9
1",
<
A
9
6
)
>
> 6
)
&
A9
,9
9
*
5$
#0,
B
A
2
6
T , %)
VV [<
c
=d
2
9
K
A
$
#
>&
'
'
'
%
?;
, U !01
6 &
c
= d
9
9
2
%$
6
'
'
:)<
) '
T
2
"
>
1
<
C '
6
, '< 0 '#0.
2
0
Z
> Z X >
+
&
"
3 C X
C
> CrownCom'06 – First International Conference on Cognitive Radio Oriented Wireless Networks and Communications<
From a Configuration Management to a
Cognitive Radio Management of SDR Systems
Loïg Godard, Christophe Moy, Jacques Palicot
Abstract—This paper proposes a functional management
architecture for Cognitive Radio systems. It relies on a
previously defined configuration management architecture for
multi-standard SDR systems, and complement it to support
cognitive radio features. This paper explains the requirements of
Cognitive Radio systems in terms of reconfiguration, smartness
and sensing capabilities. A configuration management
architecture capable of dealing with the hardware heterogeneity
and a wide range of reconfiguration scenarios expected with SDR
systems is presented. The management is distributed over the
system and a hierarchical dependency is set on 3 layers, each
having a different level of knowledge of the system and the
associated hardware constraints of the elements it supervises.
Then a cognitive management functional architecture is derived
from the previous one, copying the 3 layers of hierarchy. The
roles of the elements of each layer are discussed, as well as their
respective interactions and their relationships with the elements
of the configuration management architecture.
induced by SDR usage scenarios on the other hand.
As detailed in [5] a CR equipment should take decisions
based on information given by sensors on the one hand and on
rules and self learning capabilities on the other hand. These
considerations have direct consequences on the hierarchical
configuration management of [4]. In fact each management
level should be driven by an associated cognitive module.
That is why we propose, in this paper, a distributed
hierarchical cognitive reconfiguration management for SDR
systems.
The remainder of the paper is organized as follows. In
section II, we describe properties of Cognitive Radio systems.
Then we deal in section III with the hierarchical configuration
management architecture. In section IV, we describe our
proposal for a hierarchical cognitive management mapped on
the previous configuration management architecture. It is
particularly stressed how these two hierarchical approaches
are inter-related. Finally, in section V we conclude this work.
Index Terms—cognitive radio, cognitive management,
configuration management, multi-standard terminal
II. COGNITIVE RADIO SYSTEMS
T
I. INTRODUCTION
HE number of baseband processing contexts of a
Software-Defined Radio (SDR) system [1][2] rises
increasingly as the number of standards to handle in a single
handsets increases. Consequently, configuration management
becomes a mandatory part of a reconfigurable system [3]. But
most of the proposals in reconfiguration management didn’t
consider both aspects of the reconfiguration: hardware
reconfiguration capabilities, and reconfiguration schemes of
application. It is necessary though to adapt reconfiguration
mechanisms capabilities of hardware resources to the
switching needs of the applications. This reconfiguration
adequacy is essential to reduce reconfiguration overhead. We
proposed in [4] a first step of such a global approach of
reconfiguration management of multi-standard SDR systems.
We concluded that reconfiguration management should be
distributed and hierarchical, in order to support both HW
heterogeneity and its associated reconfiguration capabilities
on the one hand, as well as the reconfiguration schemes
Manuscript received February 28, 2006.
L. Godard, C. Moy and J. Palicot are with IETR, UMR CNRS 6164,
Automatic and Communication lab / Supélec SCEE team, Rennes, France
(+33 2 99 84 45 00; fax: +33 2 99 84 45 99; e-mail: [email protected],
[email protected], [email protected]).
A. A reconfigurable radio with sensing means
The Cognitive Radio concept, here tackled in its widest and
most unrestricted sense, refers to a communication system that
is able to observe its environment, analyze it, and react to it in
some way, as illustrated in Fig. 1.
A CR system is a smart SDR system. The target of multistandard radio is of particular interest for this approach. In
addition to its multiple communication parts, the CR system
indeed features sensing means, adapting means and a smart
sub-system dedicated to analyzing stimuli and making
decisions [6].
Sensing means refer to all the possible methods the CR
system has at its disposal for observing its environment, which
can be categorized in four main families described below:
electromagnetic environment: spectrum occupancy,
Signal to Noise Ratio (SNR), multi-path propagation,
etc.
hardware environment: battery level, power
consumption, computational resources load, etc.
network environment: telecommunication standards
(GSM, UMTS, WiFi, etc.), operators and services
available in the vicinity, traffic load on a link, etc.
user-related environment: position, speed, time of
day, user preferences, user profile (access rights,
contract…), video and audio sensor (presence
detection, voice recognition), etc.
Ele
Ha
ctr
rd
om
wa
re
ag
net
ic
Sensors
Smart sub-sytem
Adapting means
SNR
Spectrum occupancy
propagation
Memory
capacity
im
u
li
rd
O
Analysis
Power
consumption
Services
Us
Ne
ertw
rel
ork
ate
d
St
er
Decision
Learning
Standards
Standard
choice
Audio / video
alert
Beamforming
Decoder
choice
positionning
Video
Speed, position
profile
Audio
Fig. 1 - Schematic functional view of a Cognitive Radio system
As shown in Fig. 1, all the stimuli received by the sensors
from the different layers (from physical to application)
provide metrics that must be merged and analyzed jointly in
order to reach the best possible reconfiguration decision.
Reconfiguration is supported by a software radio or at least a
SDR architecture, which integrates both adequate HW
capabilities and SW architecture. Our previous research lead
to the definition of a hierarchical configuration management
[4], whose major features are recalled in part III.
B. Smartness
To make a reconfiguration radio become a CR, it is
necessary to add some smartness in order to make it selfaware [5]. We assert this smartness itself needs also to be
distributed all over the radio system. It may be foreseen
indeed that a CR will have to face high-level decisions as well
as local decisions. High-level decisions are to be understood
in the sense that network considerations or constraints on the
overall CR system may take benefit from reconfiguration.
This is what is considered in End-to-End Reconfigurability
[7]. Local decisions may concern for instance the
reconfiguration of a single digital processing block unit (PBU)
in order to adapt its behavior to a specific context (increase or
decrease the number of fingers of a RAKE receivers
depending on the number of path of the propagation channel).
But high-level decisions moreover should impact the lowlevel layers of the system through the low-level decision
entities. Which leads to the notion of a hierarchical repartition
of the smartness through the CR system.
In a CR system, it is consequently straightforward to merge
the cognitive management we consider here, with the
previously defined configuration management of part III, due
to their identical features in terms of distribution and
hierarchy. This paper is all about describing the combination
and the relative interactions between them.
III. RECONFIGURATION MANAGEMENT ARCHITECTURE
The great diversity of processing types in a multi-standard
application implies a large number of processing
configurations to be managed. The configuration management
is complex and we believe that a hierarchical approach of
configuration control and management could simplify it [4].
The benefits of clustering the transmission chain have been
presented in [8], particularly to manage partial reconfiguration
of an application. In addition to this, the proposal of a
hierarchical view enables to manage multi-granularity of
configurations, which is of particular interest for
heterogeneous architectures. The proposed model is composed
of 3 levels of hierarchy detailed in Fig. 2. A system
architecture compliant to this functional model includes one
Configuration Manager at level 1 (L1_CM), several
Configuration Manager Units at level 2 (L2_CMU), each of
them being responsible of one or several Configuration
Manager Units of level 3 (L3_CMU), which directly manage
the processing components.
Level 1
Config. Manager
L1_CM
Standard Set
Std 1
_____
_____
_____
Std 2
_____
_____
_____
Std 3
_____
_____
_____
Standards Parameters
Level 2
L2_CMU
Function Set
… Function Set … Function Set
L2_CMU
L2_CMU
Functions
Library
Level 3
L3_CMU
Block Set
L3_CMU
… L3_CMU
Block Set … Block Set
HW/SW
Blocks
Library
Configuration Management
Fig. 2 – Hierarchical distributed configuration management functional
architecture
A. L1_CM
There is one L1_CM in the system. L1_CM is the
orchestra's chief. It has the global knowledge of the state of
the system thanks to databases. It can update these databases
with the information it receives from the network on the one
hand, and from the system itself on the other hand. In the
latter case, reconfiguration procedure comes up to L1_CM as
soon as several L2_CMU are concerned. The L1_CM watches
over the coherency and the synchronization of the orders
between the involved L2_CMU.
L1_CM gives abstract orders of reconfiguration to the
underlying L2_CMU when this is needed, as for a standard
hand-over for instance. "Abstract orders" here is to be
understood as orders that are completely independent from the
nature of the HW processing devices they will impact. The
only concern of L1_CM is to know where are the L2_CMU
(on which HW devices) and how to reach them (through
which method and communication media).
B. L2_CMU
The generic functions selected in level 1 through abstract
orders are specialized at level 2 in accordance with standards
specifications. The set of attributes of each function is handled
by the L2_CMU to create each functional context of the entire
processing chain. For example, a generic function "bit to
symbol mapping" is set at level 2 to fit a selected standard. In
the case of 802.11g standard this setting could be BPSK,
QPSK, 16-QAM, 64-QAM constellation. At this level, in
accordance to performance requirements of the function, the
execution target is chosen. But the L2_CMU does not keep in
charge the effective instantiation of the context onto the
hardware. Here, functions are handled through a few attributes
to completely characterize them.
C. L3_CMU
The processing datapath architecture supporting the PBUs
depends on the reconfigurable computing resources of the
hardware architecture. The complete processing path could be
formed by different types of reconfigurable resources such as
digital signal processors, FGPA, and inside FPGA:
configurable accelerators, array of DSP blocks or a fine grain
reconfigurable datapath. The main task of the L3_CMU in the
configuration path is to find available processing resources
and configure them to enable the execution of the functional
context created at the superior level. Clock management of the
resources is also included as an important feature of the
configuration manager to reduce power consumption and to
provide a efficient management of the multi-rate applications.
IV. COGNITIVE RADIO ARCHITECTURE
The Cognitive Radio architecture is an extension of the
configuration management architecture towards a Cognitive
Radio global management.
A. Distributed Hierarchical CR architecture
The distributed hierarchical management of part III enables
to reconfigure the system with adequate features in answer to
the constraints imposed by the scenarios of use foreseen for
such systems [4]. But it supposed that the reconfiguration
orders only come from outside the system. There is a need to
include smartness in the system itself. This is all about CR. As
stated in II.B, this smartness has to be distributed inside the
CR system, as well as the configuration management.
Consequently, an identical approach has been used for the CR
architecture. Fig. 3 shows the direct correspondence set
between the reconfiguration management and the cognitive
management:
• L1_CM → L1_CR (level 1 – Cognitive Radio)
• L2_CMU → L2_CRU (level 2 – CR Units)
• L3_CMU → L3_CRU (level 3 – CR Units)
The CR architecture should be capable of taking decisions
at any level of the system, under a global supervision. It also
should be aware about the reconfiguration capabilities of all
its different elements at both HW and SW levels. The
advantage of this hierarchical structure is to permit to take into
consideration many parameters of different kind at any level
of the system on the one hand, and to impact the system in
terms of reconfiguration either locally or at a large scale on
the other hand.
Level 1
Cognitive Manager
L1_CR
Config. Manager
L1_CM
Standard Set
Level 2
L2_CRU
L2_CMU
Function Set
…
L2_CRU
L2_CMU
Function Set
…
Level 3
L3_CRU
Cognitive Management
L3_CMU
Block Set
…
L3_CRU
L3_CMU
Block Set
…
Configuration Management
Fig. 3 – Hierarchical distributed Cognitive Radio management functional
architecture
This hierarchical distributed architecture enables a wide
range of possible scenarios. From global Quality-of-Service
(QoS) considerations at the system level, to very local changes
for bug-fixing or optimization purposes as instance. Let us
consider the example of a large scale scenario: battery life
extension. The distributed smartness reports to the L1_CR,
through each L3_CRU, the status of power consumption of
each of the PBUs of the system. Combining this information
with others (concerning the QoS and the battery life duration)
obtained through other elements of the CR architecture, the
L1_CR takes appropriate decisions in terms of
reconfiguration, in order to increase battery life duration for
instance, while maintaining a targeted QoS. L1_CR gives this
order to L1_CM that makes the necessary repercussion
through the configuration management architecture of part III.
Smartness may vary from very simple reactions such as a
threshold detection and associated response (referenced in a
pre-defined table) up to very complex scenarios. The
complexity depends on the number of metrics to take into
consideration and to fuse, as well as the decision engine
implemented. The following paragraphs detail the behavior of
the elements of the cognitive architecture that make these
scenarios feasible. We opt in the following for a bottom-up
description.
B. L3_CRU
L3_CRU role is firstly to capture sensing information and
translate it into metrics. Metrics can either be computed by the
L3_CRU itself or directly be provided by the PBU it is
associated to. In this last context, the L3_CRU only receives
the result form the PBU. Otherwise the L3_CRU receives the
necessary information from the PBU (if not data themselves)
which permits to obtain the metrics. L3_CRU operation may
vary from a very simple data capture to a very complex signal
processing procedure. The implementation complexity of the
L3_CRU is consequently very variable.
Thanks to the distribution of the smartness through the
entire CR system, the L3_CRU may react in two different
ways, which permits to deal with any situation the CR system
has to face, as illustrated in Fig. 4.
Level 2
reconfiguration
orders
metrics,
notifications
Level 3
L3_CRU
metrics or
sensing
information
reconfiguration
orders
(parameters)
L3_CMU
reconfiguration
orders
processing block unit (PBU)
Fig. 4 – L3_CRU and L3_CMU message exchanges to manage a PBU
On the one hand, the sensing information captured by a
L3_CRU is known to have impacts at the only level of the
PBU it manages. It is no use to propagate the information
through the entire system. L3_CRU acts locally, takes
reconfiguration decision, interprets the metrics and converts
them into parameters it sends to L3_CMU, that makes the
reconfiguration as in III.C. L3_CRU notifies to higher layers
of the CR structure the changes that were done. Additionally,
it is the role of the L3_CRU to verify the PBU behavior and
inform higher CR layers of its status (error, stand-by, run,
etc.). If the impact is larger, on the other hand, L3_CRU sends
the metrics to L2_CRU. If all the PBUconcerned by the
reconfiguration are under the same L2_CRU responsibility,
L2_CRU plays the roles of L3_CRU in this specific context
(takes reconfiguration decision and notifies to L1_CR).
Paragraph IV.C explains in detail how L2_CRU acts.
Note that the impact of a sensing information is a priori
known. It is clear that depending on its nature, each stimulus
has one or several well-identified causes and consequences
(maybe combined with other stimuli). So that the
reconfiguration procedures that could be provoked, as well as
their range in the system are pre-defined. Another important
point to stress is that the information sent from the lower CR
layers to the higher layers have to be saved in databases that
content the state of the system. Most of them will be located at
the L1_CR level, but it can be considered to have databases
accessible at L2_CRU level.
As previously mentioned, the L3_CRU must be able to take
decisions for a local impact of the sensing information. As
stated for the metrics computation, the decision procedure
may range from very complex to very simple processing. The
following examples may be given, but the list is not
exhaustive, in increment of complexity:
- a simple threshold,
- a state machine,
- neural networks,
- optimization heuristics (genetic, etc.).
This range of decision procedures is also met at the higher
levels L2_CRU and L1_CR. The higher the level, the more
complex algorithms maybe expected. The reason is that more
metrics are to be taken into account and combined, but also
the processing devices are more suited to that kind of
processing. Note that training techniques may also be
implemented.
C. L2_CRU
L2_CRU gathers the metrics coming from the L3_CRU it
supervises, as described in IV.B. Two situations may occur.
Either it transmits the metrics to the L1_CR if these metrics
may contribute to a decision of higher level than those it can
take by itself. It may by the way update L2_databases if
necessary. And/or it converts the metrics coming from the
bottom in abstract reconfiguration orders (abstract in the sense
that not HW-dependent) for a delivery to the L2_CMU.
L2_CRU notifies the L1_CR of the reconfiguration it ordered.
Then L2_CRU supervises the correctness of the L2_CMU
reconfiguration orders thanks to all the L3_CRU reporting the
behavior of the PBU that depend from this L2_CRU/L2_CMU
couple.
D. L1_CR
L1_CR is the supervisor of the cognitive sub-part of the
system. It is the intelligence that can take initiatives depending
on the environment stimuli. These stimuli are translated into
metrics. They can come from the bottom hierarchy of the CR
system (L2_CRU and L3_CRU), namely from the processing
part of all layers of the OSI model. The PBU composing the
processing datapath may indeed process physical layer, as
well as protocol, or even application layer operations. But this
is also the L1_CR role to receive the metrics from the network
side. There is a heavy debate on the repartition of the
smartness between the network and the access point or
terminal CR system. Our proposal can cope with all possible
repartition of the smartness. Either L1_CR processes itself raw
data and extracts the corresponding reconfiguration
information to be delivered to the L1_CM, or it directly
transmits to the L1_CM the pre-processed information it
receives. Nevertheless, user-related information are under the
responsibility of L1_CR, as well as other stimuli, that are not
directly extracted from the signal processing of the radio
protocol stack: battery level, time-of-the-day, air temperature,
etc.
The actions taken by the L1_CR can be of very different
range and complexity, depending on the nature of the system
(terminal, PDA or even bigger), and its degree of smartness.
L1_CR is in charge of the multi-standard reconfiguration, and
of any reconfiguration whose range concerns several
L2_CMU. It also supervises the correctness of the
reconfiguration it initiated. Such large scale reconfigurations
(at the standard level) must respect extremely hard constraints
in terms of synchronization between the PBU, time of
reconfiguration, etc. It still seems an impossible challenge to
really perform a real inter-standard soft-handover. But if it
will be possible one day, a complete scenario must be played
in the L1_CR, going through steps with the two standards
running at the same time during a small portion of time at
least. So that the two standards could catch the application at
the very same time in order to maintain a continuity of the
service.
The decision at L1_CR may be triggered by various kinds of
events, depending on the scenarios. All can not be anticipated.
At least arrays of different operating mode (very good, good,
energy saving mode, etc.) must be considered, so that the
system can reconfigure itself according to its environment.
The trade-off is to be done between the two following
extremes. Either be very directive with all the supported
scenarios in pre-defined databases. It is possible to consider
they may be updated. Main advantage is that each scenario
may have been pre-tested. Or a certain autonomy is let to the
system. The degree of supervision of the user or the
manufacturer/network/operator has to be defined.
E. Li_CR(U) and Li_CM(U) interactions
Fig. 5 aims generalizing
the interactions described
separately for the configuration management and the cognitive
management in the previous sections. The Li_CM(U) may be
activated either from the superior level(s) of the configuration
management, or from the Li_CR(U) of the same level.
Li_CM(U) manages reconfiguration and propagates
reconfiguration orders down to the lower level, in order to do
the reconfiguration of the PBUs.
Level i+1
metrics,
notifications
Level i
Li_CR(U)
Level i-1
metrics,
notifications
reconfiguration
orders
reconfiguration
orders
either to the superior level Li+1_CR(U) or to the Li_CM(U)
depending on pre-defined rules, that maybe static or computed
on-the-fly. The Li_CR(U) also send notifications (of behavior
accuracy for instance) to the higher level of CR management.
So, we observe a bottom-up metrics and notifications flow in
the cognitive management and a top-down configuration flow
in the configuration management.
V. CONCLUSION
The functional Cognitive Radio architecture proposed in
this paper relies on the configuration management architecture
we already defined for SDR systems, and which is currently
under implementation. This prototyping at the configuration
management level strongly orientates our investigations
towards cognitive management, which is a guarantee of the
viability of our solution for the implementation the CR
concepts. The originality of this proposal is due to the
effective separation of the configuration and the cognitive
management of a SDR system, while keeping a strong
relationship between them. Respective roles and duties are
well identified, as well as their interactions and interfaces with
the other entities.
Future papers will more precisely detail the intimacy of the
architecture, and illustrate its mechanisms through examples
of cognitive radio scenarios. Many points still remain to be
investigated or at least better defined. Let us just stress the
repartition and the content of the configuration and knowledge
databases. Configuration databases are the repository of the
necessary pieces of binary code (for processors) or bitstream
(for FPGA) for the instantiation of the standards supported by
the system. Knowledge databases contains the metrics from
which the cognitive management takes its decision. The
management of the decisions itself is a wide area we plan to
address in the future.
REFERENCES
[1]
[2]
[3]
[4]
[5]
Li_CM(U)
reconfiguration
orders
Fig. 5 – Generalized view of the interactions between the CR and CM
mangement.
The Li_CR(U) receives or compute metrics extracted from
stimuli of different kinds or from the inferior level of
cognitive management, and decides to send the information
[6]
[7]
[8]
Joe Mitola, "The software Radio Architecture", IEEE Communications
Walter Tuttlebee, "Evolution of radio systems into the 21st century",
Proc. IEE Int. Conf. on 'Radio receivers and associated systems', 1995.
Apostolos Kountouris, Christophe Moy, "Reconfiguration in Software
Radio Systems", 2nd Karlsruhe Workshop on Software Radios, Mar.
2002.
Jean-Philippe Delahaye, Jacques Palicot and Pierre Leray, "A
Hierarchical Modeling Approach in Software Defined Radio System
Design", IEEE SIPS, Athens, Greece, 2-4 Nov. 2005
Joe Mitola, "Cognitive Radio – An Integrated Agent Architecture for
Software Defined Radio", thesis dissertation, 8 May 2000.
Christophe Moy, Alexis Bisiaux, Stéphane Paquelet, "An Ultra-Wide
Band Umbilical Cord for Cognitive Radio Systems", PIMRC'05, Berlin
Germany, September 2005.
Markus Dillinger, Nancy Alonistioti, Nikolas Olaziregi, Thomas
Wiebke, "Network Functions Supporting Reconfiguration in B3G
Environments", Frequenz 58, May 2004, ISSN 0016-1136, pp. 121-125.
Jean-Philippe Delahaye, Christophe Moy, Pierre Leray, Jacques Palicot,
"Managing Dynamic Partial Reconfiguration on Heterogeneous SDR
Platforms", SDR Forum Technical Conference, Anaheim, USA, 15-17
Nov. 2005
+0
# "
0
5 $
0 (1#/#"("#
/ +<
+" 1
! ( 3# #)
3
+0
+0
A simulator for the design of the management
architecture of cognitive radio equipments
Godard Loı̈g, Moy Christophe, Palicot Jacques
SUPELEC / IETR
Avenue de la Boulaie CS 47601
F-35576 Cesson-Sévigné Cedex France
Email : {loig.godard,christophe.moy,jacques.palicot}@supelec.fr
Abstract— This paper presents a simulator for the design of
the management architecture of cognitive radio (CR) equipments. It explains the requirements for CR systems in terms
of reconfiguration, smartness and sensing capabilities. A key
point in CR systems is the reusabilaty and the reconfiguration
possibilities. Thus classical approaches, which refer to create
specialized material to assume communication process, can not be
used. Indeed, CR systems have to be deployed on a heterogenous
execution platform to be flexible and adaptable. We propose
a simulator based on a Hierarchical and Distributed Cognitive
Radio Architecture Management (HDCRAM) meta-model to play
CR scenarios. The simulator particularly helps defining precisely
the needs in terms of CR management architecture.
Index Terms—Cognitive Radio, multi standard, heterogeneous processing, cognitive management
I. I NTRODUCTION
The growing numbers of communication standards inside
radio equipments leads to a query : how to design multiple
radio communication chains without taking too much silicon
space ? An answer is the use of Software Define Radio
(SDR) techniques that give the opportunity to reconfigure
a part or the totality of resources of the equipment [1].
Beyond this approach, Cognitive Radio (CR) aims at giving
the opportunity to a communication system to be aware of its
environment [2]. This smartness opens new possibilities for
radio equipments : maintaining a good quality of service (QoS)
with bad communication conditions, being able to handle new
standards or new functions on demand and so on. It derives
that future radio equipments have to be reconfigurable and
aware of their environment. This possibility means to be able
to sense their environment [3], interpret metrics, take decisions
of reconfiguration in order to proceed some optimization,
reconfigure the system and finally check the reconfiguration
result. This is an infinite loop of reconfiguration in the sense
that there are always new metrics coming from internal or
external sensors that stimulate new reconfiguration operations.
Our research work is focused on how a radio equipment
can manage the reconfiguration after taking decisions in
a heterogeneous computing context. We propose that for
such systems composed of GPP, DSP, FPGA and ASIC, a
hierarchical and distributed management for reconfiguration
[5] and cognition [6] has to be deployed. This approach gives
the opportunity to tackle fine granularity reconfigurations,
as well as larger scale of reconfigurations according to the
needs. This approach also underlines the fact that decisions
of reconfiguration can be taken locally or globally in order
to decrease the time response of the system in terms of
optimization needs.
In a first part we introduce the Hierarchical and Distributed
Cognitive Radio Architecture Management (HDCRAM) we
propose for CR equipments (such as terminals and base
stations). Then we develop an example of a CR scenario
where metrics are evolving, provoking some reconfigurations
in the equipment and explain how our architecture can cope
with it. The scenario is played by a simulator derived from a
metamodel of the HDCRAM and which addresses :
. Sensing means and metrics generation,
. Metrics analysis,
. Decision taking,
. Reconfiguration management.
Simulation results run permit to precisely define and derive
actions and communications means required within the global
CR management architecture of a CR equipment. Finally we
conclude on the perspectives of our work on this simulator for
cognitive radio equipment.
II. T HE HDCRAM
A. A CR Architecture Based On A Reconfiguration Architecture For SDR
We derived from previous studies that a hirearchical and
distributed SW architecture is required for the reconfiguration
management of SDR equipments [4] [6] [7]. This helps
supporting reconfiguration of a wide range of granularity, on
heterogeneous HW computing resources (GPP, DSP, FPGA)
with hard real-time constraints. It is composed of 3 levels
of hierarchy above the flexible signal processing elements
operating the radio modulation/demodulation.
Then the SW reconfiguration management architecture for
CR equipments has been extended. This consists in associating
cognitive units to the reconfiguration units at each level of
the management hierachy. We called it HDCRAM, which is
illustrated in Figure 1.
view of the architecture that has to be deployed in each
cognitive radio system accordingly to its functionality. At
Fig. 1. Hierarchical and Distributed Cognitive Radio Architecture Management (HDCRAM)
B. A Hierarchy Description
The architecture is based on 3 levels, each of them having both a reconfiguration management unit Li CM(U) and
a cognitive management unit Li CR(U). There is only one
L1 CR and one L1 CM. Several couples made of one L2 CRU
and one L2 CMU are respectiveley depending on L1 CR
and L1 CM. Each couple made of one L2 CRU and one
L2 CMU are respectively managing several couples made
of one L3 CRU and one L3 CMU. This architecture uses a
specific control flow as show in Figure 2. The configuration
management part is characterised by top-down communication
exchanges whereas at the opposite, the cognitive management
uses bottom-up communications. Note also a control flow from
a CR(U) to its associated CM(U) for reconfiguration orders
sending.
Fig. 2.
Control flow in HDCRAM
Fig. 3.
the top level is the general management of the equipment
with the 2 entities L1 CM and L1 CR. The L1 CR manages
the cognitive radio equipment behavior which includes the
sensing and decision side of the management architecture,
which can initiate reconfigurations of the radio equipment
processing units, taken in charge by the L1 CM. At L1 level,
the HW considerations of the signal processing elements
implementations are abstacted.
At the bottom, each signal processing element of the radio
chain is associated with its own L3 CMU and L3 CRU for
very fine reconfiguration and cognitive purposes. The design
of each group made of a signal processing element, a L3 CRU
and a L3 CMU is very dependent of the HW implementation
imposed by the electronic device supporting the processing
operation. Intermediate level L2 is both an abstraction level
between top level L1 and bottom level L3, as well as a local
manager for the L3 units it is responsible of.
III. A N E XECUTABLE M ETA - MODEL
C. A Meta-model For A CR Management Architecture
In order to open this achievement to the community, the
HDCRAM of Figure 1 has been modeled in UML (Unified
Modeling Langage). The HDCRAM meta-model of Figure
3 underlines the dependencies, the vertical and horizontal
connections as well as the cardinality of each class in the
HDCRAM system. This meta-model represents a factorized
Meta-model of the HDCRAM
A. Kermeta
The main advantage of the definition of HDCRAM in
an object-oriented meta-language is to offer modularity and
reusability. Classical object-oriented meta-languages such as
MOF (Meta-Object Facility) [8] or EMOF (Essential MetaObject Facilities) offer a high level of abstraction of an
application as well as facilities to specify Domain-Specific
Language (DSL). However, these meta-languages only support structural specifications and does not support operational
semantic. Kermeta [9] language was developed at the INRIA
to support behavior definition and provide an action language
to specify the body of operations in meta-models. With the
help of Kermeta language we can describe our architecture
with both structural and behavioral definition. This offers the
oportunity to refine the meta-model with scenarios that ensure
the system is conform to specifications.
B. Developing Environment
As Kermeta is fully integrated to EMF [10] (Eclipse Modeling Framework), it offers outline view to navigate through
the whole model and meta-model, code autocompletion and
various import/export transformations (ecore2kermeta, kermeta2ecore, kermeta2xmi - xmi version of your kermeta
meta-model -, xmi2kermeta, xmi2ecore). These import/export
transformation facilities offer to the designer the possibility
to describe an application in a Topcased graphical view (class
diagram as shown in Figure 3) and export it in an Ecore format.
This format is the textual version of the graphical view as
illustrated in Figure 4.
Fig. 4.
Meta-model Ecore view
Then designer can specifiy his own DSL in Kermeta code
to define the operational semantic of his meta-model and
execute simulations (thanks to ecore2kermeta transformation).
As the interoperability of Kermeta with EMF/Eclipse tools is
well performed, the designer can also deploy a model of his
architecture in a recursive editor, set the property of each class
in a pre-defined state and save it (in a xmi format) or launch
the application. A simple load action configures the system in
a previously saved state as shown in Figure 5.
IV. A HDCRAM S IMULATOR
Kermeta executable language is not used here to perform
heavy computational and complex cognitive algorithms but to
precisely detail the intimacy of the architecture, and illustrate
its mechanisms through examples of cognitive radio scenarios.
A solution to cleary specify the reconfiguration and cognition
capabilities of the HDCRAM is to express them in a language
that matches CR domain constraints. Thus, a simulator has
been developed, based on an executable meta-language that
can handle CR domain constraints (such as awarness, spectrum
access, decision making, reconfiguration achievement ).
Fig. 5.
Operation load in Kermeta code
This simulator offers the oportunity to the designer, in a
first step, to specify the CR system in a predifined state
with parameters such as the standard in use, QoS, hardware
repartition of operators on the execution platform, etc. In a
second step, the designer can modify operators metrics via a
shell command and validate interpretation and decision taken
according to those changes. Execution steps clearly underline
operations that have to be supported by both CR and CM
part, at each level of abstraction of the architecture. A key
point in CR levels is how metrics are captured, analysed and
how decisions of reconfiguration are taken.
Those operations are underlined in a cognitive cycle applied
at each level of the cognitive management. This cognitive
cycle derived from J. Mitola cognition cycle [11], benefits
from the proposed hierarchical and distributed architecture.
Indeed, distribution gives the opportunity to tackle multigranularity reconfiguration, decreasing architecture response
time and offer the possibility to reconfigure only the necessary
part of the architecture. Note that decision procedure may
range from a very complex to a very simple processing. The
following examples may be given, but the list is not exhaustive,
in increment of complexity :
• Simple threshold,
• State machine,
• Neural networks,
• Optimization heuristics (genetic, etc.).
The higher the level is, the more complex algorithms maybe
expected. The reason is that more metrics have to be taken into
account and combined. We present in Figure 6, the operations
used by each CR(U) deployed in a system.
Operations are defined as follows :
• Evaluate metric : evaluate the impact of the metric on
the level.
• Analyze metric : identify the metric
The result of those operations gives a performance appreciation from which CR(U) can define actions :
• Define action : in a first step, the designer sets a list of
actions. But in the future, this part can benefit from self
learning capabilities.
Fig. 6.
Cognitive cycle
• Prioritize action : according to the impact (in reconfiguration terms) of metrics on the system, a priority is given
to the action.
• Define Metric Strategy Action : This operation can be a
simple metric sending (in an appropriate format) to the
upper layer, or an order of reconfiguration sending to the
corresponding CM(U) followed by a notification on the
new configuration to upper layer.
Once a CR(U) has taken a decision of reconfiguration, it
sends new parameters to its associated CM(U) that is responsible of its application. Reconfiguration management must
respect hard constraints in signal processing, reconfiguration
time and free resources management. CM(U) levels also have
to deal with pripority and must provide a task scheduler in
order to achieve reconfiguration. This is exactly the kind of
studies the simulator will help to investigate in future work.
Section V will give an overview of a scenario played with
our simulator.
V. A S CENARIO
For clarity purpose, the chosen sceanrio presents a local
decision of reconfiguration. Local is to be taken here in the
sense that there is no decision coming from the network.
The equipment analyses its own performance and decides to
reconfigure itself.
The scenario underlines metrics capture, interpretation and
decision of reconfiguration with effective reconfiguration.
Figure 7 illustrates the deployed system in a tree view.
We can recognize one L1 CM/L1 CR couple, only one
L2 CMU/L2 CRU couple in this trivial example, as well as
two couples of L3 CMU/L3 CRU supervising two operators :
an ISI-capture (Inter Symbol Interference) and an equalizer
operator. The role of an equalizer is to correct the disturbances
due to ISI. An ISI metric generated by the ISI-capture operator
is acquired by its L3 CRU which transmits it after interpretation, to its L2 CRU that orders (or not) a reconfiguration of
the equalizer operator (activate or bypass it). In this scenario,
there is no need for L3 CRUs to order a reconfiguration to their
own operator. The equalizer for instance can not provoke its
Fig. 7.
Recursive editor and class properties view
own reconfiguration from a metric it is generating itself. It is
the duty of the L3 CMU controlling the operator equalizer to
check operator behavior via acknoledgement signals.
Reconfiguration orders are sent when the metric interpreted
by the L3 CRU controlling the ISI-capture operator vary from
good to bad and vice versa. This simple scenario has been
chosen to facilitate understanding but more complex scenarios
can be played. From this, L2 CRU interprets this metric in
terms of radio signal quality. If incoming metric value is
good then the signal ”performance” criteria (according to the
meta-model attribute) is good and consequently L2 CRU’s
performance is good. If metric value is bad then signal is
disturbed and L2 CRU performance switches to ”disturb”
state. According to its performance L2 CRU decides to send
the reconfiguration order (corresponding to this situation) to
the equalizer operator via its L2 CMU. In the current status of
our work, to each situation corresponds a pre-defined action.
This choice has been done in order to keep system integrity,
nevertheless this may change in the future with the use of more
sophisticated algorithms, having self learning capabilities for
instance. Reconfiguration may range from deleting operator if
there is a resource shortage or a low battery alert, bypassing it
if ISI metric is good, or activating it if signal is degraded. If the
equalizer is not deleted, the L3 CMU controlling the equalizer
achieves the reconfiguration by selecting the appropriate file
in its data-bases, and implements it on the platform if this is
a new implementation or change data path to bypass it, if not.
When a scenario is played, the designer enters new paramters coming from operators, in a shell (Figure 8) and validates
the process with the returned information.
Fig. 8.
Command shell
As previously indicated, the designer can also validate a
scenario with the recursive editor of bottom Figure 9 that gives
an overview of the current state of the selected object.
simulator is based on a DSL that underlines both HW and
SW constraints for the Cognitive Radio domain. The aim
of the simulator is to permit to designers to include their
own operators, thus increasing system’s operators data-base,
defining (not exhaustive) :
. Standard(s) and function(s) dependencies,
. Metric nature and impact,
. Possible HW target,
. Reconfiguration management.
Once defining operators and management rules, the designer
can use them and validate the equipment behavior. Considering
the current development of the simulator, distribution of both
CR(U) and CM(U) managers through the design is let at the
discretion of the designer. Nevertheless, it could be interesting
to develop mathematical tools that deploy the HDCRAM
through the design optimally.
Although this paper also presents how decisions of reconfiguration are taken and how they are done in HDCRAM, this
reveals the following issue. How to guarantee indeed a stable
and effective global reconfiguration of the equipment if each
CR(U) can decide of a reconfiguartion on its own ? Isn’t the
best the enemy of the good ? These considerations have to be
taken into account in both network and equipment point of
view and will be adressed in forthcoming studies.
Another open issue is to consider real time constraints
at both decision and reconfiguration levels. Concerning the
reconfiguration side on the one hand, we have studied and
experimented many dynamic reconfiguration contexts which
make us aware and confident about the limitations to be
expected. On the other hand, we plan to address the decision
side of the issue in our future research programs.
R EFERENCES
Fig. 9.
L2 CRU state after a run
As L1 CR is at the top of the architecture, it has to be
informed of the performance of each function in use. Thus,
it interprets metrics from functions and obtains a global
performance of the standard operating in the equipment. From
this, it can decide (or not) a reconfiguration action that can
range from reparameterize one to all functions or even decide
a standard change. As we are not considering a complete
transmission chain in this particular scenario, L1 CR only
receives interpreted metrics from L2 CRU and stores them.
VI. C ONCLUSION
This paper has presented a simulator for the design of the
management architecture of cognitive radio equipments. This
[1] Joe Mitola, ”The software Radio Architecture”, IEEE Communications
[2] Joe Mitola, ”Cognitive Radio An Integrated Agent Architecture for
Software Defined Radio”, thesis dissertation, 8 May 2000.
[3] Christophe Moy, Alexis Bisiaux, Stphane Paquelet, ”An Ultra-Wide
Band Umbilical Cord for Cognitive Radio Systems”, PIMRC’05, Berlin
Germany, September 2005.
[4] Jean-Philippe Delahaye, Jacques Palicot and Pierre Leray, ”A Hierarchical Modeling Approach in Software Defined Radio System Design”,
IEEE SIPS, Athens, Greece, 2-4 Nov. 2005
[5] Loı̈g Godard, Christophe Moy, Jacques Palicot, ”From a Configuration
Management to a Cognitive Radio Management of SDR Systems”, in
Proc. of CrownCom’06, Mykonos, Greece, 8-10 June 2006.
[6] Jean-Philippe Delahaye, Christophe Moy, Pierre Leray, Jacques Palicot,
”Managing Dynamic Partial Reconfiguration on Heterogeneous SDR
Platforms”, SDR Forum Technical Conference, Anaheim, USA, 15-17
Nov. 2005
[7] Apostolos Kountouris, Christophe Moy, ”Reconfiguration in Software
Radio Systems”, 2nd Karlsruhe Workshop on Software Radios, Mar.
2002.
[8] OMG. MOF 2.0 Core Final Adopted Specification, Object Management
Group,http ://www.omg.org/cgi-bin/doc ?ptc/03-10-04, 2004.
[9] Zaid Altahat, Tzilla Elrad, Didier Vojtisek, ”Using Aspect Oriented
Modeling to localize implementation of executable models”, Models
and Aspects workshop, at ECOOP 2007
[10] Budinsky, F., Steinberg, D., Merks, E., Ellersick, R. and Grose, T.
Eclipse Modeling Framework. Addison Wesley Professional, 2003.
[11] J. Mitola, Cognitive Radio : Making Software Radios More Personal,
IEEE Pcrsonal Communications August 1999
'-
:
$
"#0 -
504
-
"
, '< 0 '#0.
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
'-
$
"#0 -
504
-
-(
& ;!%
% &
, G
@
>(
6
H
9
6
9 K >
9
6
.$
,$
# #$
03
6 <
>
1 "
'-
'- "
%
%.
6
*
6
,$
'0#
9
'> 6
&
'
K
9
>(
1$ $
!
6
6
9
9
>
L
+
>
'
9
0
6
K
,
2 G
96
K
2
26
- '- 5 3$ '
9 &
6
A
"
:&
c ( ;d
9
H
'9
A
>&
>
9
+
BG,
P H<
6
A
S
>
3
2
>
6
,92
+
+
+
1
9
<
> ( &
9
>&
9
<
> - '- 5 3$
<
&
>
A
<
<
9
c ( @d
7
'
A
9
>
(
2
2
9
Q
,9
2
>
2
+
<
6
&
&
96
%'
'>
' '
)
&
2
96
c
= d
<
2Z/
Z.
,
&
2
c
==d
<
9
9 6 %
(
6
)
>
2
2
>
<
69
$ 9
9A
(
9
(
+
9
1 '#
0/15
&
9
&
2
> '
<
(
1 . 6
>
>
6
"
, '< 0 '#0.
%
'
+ 1")
9
B
504
6
Z/ %
%
)
/
1$ $
!
1 "
2&
:
#
Couche cachée
de (I) RBF
Couche de
(J) sorties
w1
f (γ , C1 )
x(t + mTe )
γ(1)
DS
P
ŷ1
100 kHz
f (γ , C i )
γ ( 0)
200 kHz
f(γ,CI −1)
γ(LTFD / 2)
f (γ,CI )
Band
adaptation
ŷ J
300 kHz
6M
&
BPc
8 MHz
wI
DS/FH
Detection DS / FH access mode with Time
frequency analysis
) .!
'-
+ /-)
1
>,
.
.$
,$
# #$
03
Logique de sortie
$
"#0 -
Prétraitement
'-
Detection Mono/ Multi carrier with
Cyclostationnarity techniques on the Guard
Interval
IG
! Q
&
Standard
"
&
& "
Channel bandwidth
&
F
U
S
I
O
N
R
Channel Filter
DCS 1800/DCS 1900
200 kHz
Gaussien 0.3
PDC
25 kHz
Nyquist 0,5
CT2
100 kHz
Gaussien 0.5
GSM
200 kHz
Gaussien 0.3
EDGE
200 kHz
Linearized Gauss
GPRS
200 kHz
Gaussien 0.3
PHS
300 kHz
Nyquist 0.5
1 MHz
Gaussien 0.5
Bluetooth (IEEE802.15.1)
IS95
1.25 MHz
FIR
Globalstar
1.25 MHz
RIF 48 coeff
DAB
1.712 MHz
Window
DECT
1.728 MHz
Gaussien 0.5
UMTS
5 MHz
Nyquist 0.2
DVB-T
7–8 MHz
DVB-S
32-36 MHz
RLAN
10 MHz
Nyquist
20 MHz
Gaussien 0,5
32-36 MHz
Nyquist 0.2
Hiperlan I
LMDS
Hiperlan II
50 MHz
Window
REC (Nyquist) 0,3
Window
IEEE802.11g
20 MHz
Window
IEEE802.11b
1 MHz
Gaussien 0,5
IEEE802.15.4
2 MHz
Gaussien 0,5
&;% ( M
&
"
&
7! % "&
, '< 0 '#0.
Standard
!
!
+
+
#
"
'- "
'-
504
$
"#0 -
'
-
.$
,$
# #$
03
1 "
2
+ >
%
. .
c
=;d 1= = c
=@d
9
Z B
6
2
"7
'-
'- "
A
1=
)
2
H
"
)
/ :
Z =
1B c
=:d
1
K
Z ?
"
2
9
>
G+
)),
/ B 3 Z'05`` ,
9
9
92
A
3 Z'05``
A
6
>
,
(
2
*
1$ $
!
9 + + A
,$
'0# 6 5 >
6
9
<
<
!-0VV$ c
=Bd
/# 81 !
<
2
9>
19
>&
9
6
2
(
>
6
,9
c ( d
c ( ;d
2
K
&
>&
92
"
,
>
, '< 0 '#0.
A
2
?
'-
$
"#0 -
504
-
"
, '< 0 '#0.
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
Dossier
LA PERCEPTION DES CHAMPS ÉLECTROMAGNÉTIQUES AMBIANTS :
UN 6ÈME SENS POUR L'HOMME ?
La bulle sensorielle radio
intelligente
I
Jacques PALICOT, Rachid HACHEMANI, Christophe MOY
Supélec / IETR - Campus de Rennes
1. Introduction
Commençons cet article par une analogie entre les
systèmes de radiocommunications transportant de l’information et le transport terrestre de biens. Le transport ferroviaire est un lien physique entre 2 gares. Le train sur
lequel se trouve le bien à transporter, doit suivre les rails
et ne peut pas choisir d’autres « routes » et d’autres horaires. Notre analogie consiste alors à dire qu’une communication radio classique sur un lien standardisé du type
GSM n’offre aucune autre possibilité que suivre les rails
(une fréquence et une modulation données) pour que l’information atteigne le destinataire. Lorsque ce même bien
est transporté via le réseau routier, il existe toute une
infrastructure qui permet de rejoindre ces 2 mêmes points
avec une liberté d’horaire. Poursuivons notre analogie, la
radio logicielle sera pour nous l’infrastructure équivalente
au réseau routier qui offrira un choix possible (sur la fréquence, la modulation…) pour transmettre l’information
de l’émetteur au destinataire. Sur ce réseau, le conducteur
1
Mots clés
Cognitive Radio,
Capteurs intelligents,
Radio logicielle,
Terminal universel,
Convergence
aura toute liberté pour choisir son itinéraire en fonction
de différents critères (temps de parcours, distance, coûts
des péages, trafic prévu, heure de départ, etc...). De la
même manière la radio intelligente permettra au terminal
(le conducteur) de se déplacer dans l’infrastructure radio
(grâce à la technologie radio logicielle) en ayant beaucoup plus de choix à sa disposition, grâce à l’information
donnée par la « bulle sensorielle ». La « bulle sensorielle »
est chargée de fournir au système radio intelligent des
informations sur l’environnement du système afin que
celui-ci puisse effectuer les meilleurs choix en terme de
modulation, fréquence porteuse, codage canal,… à un
instant donné, dans un lieu donné et pour un service
donné. Dans ce travail sur la définition du concept de «
bulle sensorielle » nous nous plaçons dans un contexte de
« handover 1 généralisé », comme illustré par la figure 1,
c’est-à-dire que les choix sont à faire parmi un ensemble
pré-déterminé, constitué des standards de communications sans fil présents à l’endroit et au moment de la communication. Ce concept, initialement proposé dans le
« Handover » correspond à une technique offrant une transition entre 2 points d’accès soit identiques (« Handover » horizontal) ou différents
(« Handover » vertical). Cette transition peut être instantanée ou douce.
L ’ E S S E N T I E L
La radio intelligente est une évolution de la radio logicielle. Plus
précisément cette dernière sera la technologie supportant ce nouveau concept de radio intelligente. La radio intelligente est un système de radiocommunications qui adapte son fonctionnement à
l’environnement (au sens le plus large du terme). Nous présentons dans cet article, le concept de « bulle sensorielle ». Suivant
ce concept, le terminal intelligent possède une bulle multidimensionnelle, dont le diamètre dépend, dans chaque dimension, du
capteur considéré. Elle donne au terminal toute l’information
nécessaire à la prise de décision adéquate quant à son fonctionnement optimal (en terme d’utilisation des ressources disponibles)
et sécurisé (en terme de QoS). Pour expliquer notre propos, nous
ferons des analogies avec la « bulle physiologique et psychologique » d’un individu ainsi qu’avec cette même bulle lorsque cet
individu devient un conducteur dans un contexte automobile. Par
ces analogies, notre but est de déduire des règles comportementales quant à l’utilisation des systèmes de radiocommunications.
Il semble clair que la radio intelligente ne sera pas opérationnelle
et disponible pour offrir les futurs services sur les réseaux des prochaines générations avant de nombreuses années, tant les difficultés techniques sont encore importantes.
S Y N O P S I S
Cognitive radio is an evolution of software radio. software radio
indeed is an enabling technology for cognitive radio new concept.
A cognitive radio communication system matches its behaviour to
the environment (in its largest sense). This article introduces the
concept of the "Sensorial Bubble". A cognitive radio terminal has a
multi-dimensional bubble, the diameter of which depends in each
of its dimension of the corresponding sensor. The bubble provides
the terminal with all the necessary information to take adequate
decisions concerning its optimal (in terms of available resources
use) and secure (in terms of quality of service) operation.
Analogies with the human "physiological and psycholigical bubble",
as well as with the "driver bubble", are provided to illustrate the
concept. These analogies aim in the long term at deducing behaviouring rules for future cognitive radio terminals. It is obvious that
cognitive radio will not be a reality in the very next commercial
wireless radio networks, but it will take many years to solve technical challenges still remaining.
REE
N° 9
Octobre 2007
1
Dossier
Figure 2. La radio logicielle idéale.
Figure 1. Le Handover généralisé.
contexte de [8] et maintenant communément adopté dans
la communauté des radiocommunications, sous-entend
une double mobilité : une mobilité géographique classique nécessitant un « handover horizontal » pour les systèmes cellulaires et une mobilité entre réseaux, standards,
services. Cette dernière mobilité est offerte par le « handover vertical ». Celui-ci doit être réalisé en évitant les
coupures de communication pour l’usager. C’est avec
l’avènement des technologies radio logicielle que ce type
de « handover » est devenu véritablement envisageable
car une reconfiguration du terminal est nécessaire.
Le « handover horizontal » quant à lui existe déjà
actuellement dans les réseaux mobiles. C’est lui qui permet de se déplacer de cellule en cellule, mais il n’implique pas de reconfiguration du terminal.
2. De la radio logicielle à la radio intelligente
Le marché de la téléphonie mobile grand public tend
à intégrer de plus en plus de standards (GSM, GPRS,
EDGE, UMTS) et autres moyens de localisation (GPS). A
cela commence à s’ajouter la réception des réseaux de
diffusion (FM, DAB, DVB-H) et des réseaux locaux à
haut débit (WiFi). Il n’est pas envisageable économiquement de passer à ce stade avec les techniques habituelles
de duplication des composants électroniques pour chacun
des standards évoqués dans chaque produit. La radio logicielle vise à lever ce verrou. De plus, cette évolution technologique favorisera également l’insertion de nouveaux
moyens de communication dans d’autres domaines faisant de plus en plus appel aux systèmes communicants.
En particulier, une automobile intègre déjà plusieurs
moyens de communication et ce n’est qu’un début. A la
radio analogique (FM, GO), s’ajoutent désormais la téléphonie mobile, le GPS, le système de péage à distance.
En outre, de nombreux autres systèmes s’apprêtent à être
intégrés dans tous les moyens de transports routiers,
connus sous le nom ITS (Intelligent Transport System)
qui géreront de nouveaux systèmes radio tels que les systèmes anticollision, les systèmes d’information au guidage… Là encore, la radio logicielle offrira des solutions
à coût moindre et efficacité supplémentaire pour la mise
en commun de parties majeures de ces systèmes.
2
REE
N° 9
Octobre 2007
Figure 3. La radio logicielle restreinte : l’architecture
pragmatique dans l’exemple d’un récepteur.
2.1. La Radio Logicielle idéale (RL)
La figure 2 présente le schéma de référence de la RL
[1]. Les conversions s’effectuent tout de suite après l’antenne et l’architecture est entièrement reprogrammable
car le processeur de signaux est directement relié aux
convertisseurs. Les fonctions CAN et CNA haute fréquence et large bande seront très compliquées à réaliser et
constituent, encore aujourd’hui, une des difficultés
majeures de la RL. Il est à noter également que les
contraintes de performance de calcul imposées au processeur sont également extrêmes et que ceci est aussi un défi
technologique qui reste à relever. La consommation de
puissance de tels systèmes est un challenge à relever également dans le cas de systèmes embarqués.
La radio logicielle implique également la présence
d’un système de gestion de reconfiguration qui s’ajoute
aux organes de traitement du signal.
2.2. Les architectures actuelles : La Radio Logicielle
Restreinte (RLR)
On peut restreindre l’approche de la figure 2 à des
sous-bandes du spectre afin de limiter les contraintes sur
le matériel. Actuellement, les architectures des récepteurs
les plus couramment utilisées (récepteur superhétérodyne) ne permettent pas une interchangeabilité des fonctions radio. Comme cela a été précisé précédemment et
abondamment décrit dans la littérature, les contraintes
pour réaliser une radio logicielle idéale sont telles que les
acteurs du domaine ont proposé une radio logicielle restreinte dont le principe est basé sur l’architecture pragmatique [10] [11] présentée sur la figure 3 dans l’exemple
d’un récepteur.
Le FEA (Front End Analogique) conserve l’ensemble
des fonctions analogiques qui ne peuvent pas encore être
réalisées dans le domaine numérique. Ce sont par exemple certains filtres, les amplificateurs.... Ensuite vient
directement la fonction de conversion analogique numérique. Celle-ci est effectuée par le CAN (Convertisseur
Analogique-Numérique).
Enfin le traitement du signal du FEN (Front End
I
La bulle sensorielle radio intelligente
I
Figure 5. La radio intelligente en fonction d’un modèle
simplifié en 3 couches.
Figure 4. Un schéma simplifié du cycle intelligent.
Numérique) est effectué par un ou des composants programmables qui peuvent être de type logiciel (processeur)
ou matériel (reconfigurable : FPGA ; paramétrable :
ASIC), mais qui la plupart du temps combineront les deux.
L’essentiel est qu’ils offrent la plus grande flexibilité possible au système. Les fonctions effectuées par ce bloc correspondent à des fonctions auparavant effectuées en analogique, mais que la technologie actuelle permet de réaliser en
numérique. Parmi ces fonctions nous trouvons : le changement de fréquence d’échantillonnage, la transposition en
bande base, le filtrage du canal d’intérêt...
potentiellement peuvent utiliser ces canaux. Ensuite imaginons que le déplacement de chaque pièce n’est plus
préalablement défini mais peut se construire en temps
réel en fonction de l’environnement. Nous laissons au lecteur le soin de conclure sur le nombre infini de combinaisons possibles pour prédire la meilleure utilisation qui
peut être faite du spectre sur la durée, et sur la nécessité
de posséder une intelligence pour gérer ce problème :
c’est la conclusion à laquelle est arrivée J. Mitola en
1999. La dénomination « radio intelligente », que nous utilisons dans cet article, est connue dans la communauté
internationale sous la forme anglaise « cognitive radio ».
2.3. La radio intelligente
D’après la définition qu’il en a donnée, un système de
radio intelligente est capable d’adapter son fonctionnement à l’environnement grâce à :
• des capacités d’analyse de sa situation,
• une intelligence qui permet de prendre des décisions
appropriées par rapport à des critères établis ou appris,
• des capacités d’auto-reconfiguration pour adapter sa
fonctionnalité.
Ce cycle intelligent, déjà bien connu dans les domaines de l’IA et des réseaux de neurones est représenté de
manière simplifiée sur la figure 4.
Le contexte actuel de convergence entre systèmes
d’information et de communication contribue non seulement à l’insertion de fonctions de télécommunications
dans un nombre croissant de produits, mais également à
l’intégration de plusieurs moyens de communication dans
un même système. Un PC standard comprend désormais
une liaison Bluetooth (pour envoyer le son aux écouteurs,
par exemple), plusieurs standards de réseau local haut
débit (IEEE 802.11a, b et g) et a la possibilité d’intégrer
une carte 3G. On comprend l’intérêt, si ce n’est la nécessité dans certains cas (téléphone portable), de prendre en
compte dans la conception globale du système la multiplicité des moyens de communication par voie radio et de
gérer le spectre et ces différents interfaces air de manière
intelligente. Il s’agit d’optimiser “ici et maintenant” l’utilisation de la ressource spectrale. C’est cette problématique de la gestion optimale du spectre qui est à l’origine
des travaux sur la « radio intelligente » [2] [3] [4] [5].
Pour illustrer cette problématique, prenons une nouvelle image. Nous supposons que le lecteur connaît les
règles du jeu d’échecs 2, et sait le nombre considérable de
possibilités. Supposons maintenant, qu’un nouveau jeu
d’échecs soit défini en remplaçant les 64 cases du damier
par plusieurs centaines de canaux spectraux. Remplaçons
les 32 pièces du jeu par plusieurs dizaines de signaux, qui
2
Comme une préoccupation essentielle actuellement
est le coût de la ressource spectrale (rappelez-vous le prix
exorbitant des licences UMTS) et l’optimisation de son
utilisation, la radio intelligente est souvent réduite à l’optimisation de la ressource spectrale. Cependant, notre
vision de la radio intelligente est plus générale que cela
[17]. Pour expliquer notre approche, nous avons volontairement découpé en 3 grandes couches notre modèle de la
figure 5 [16] :
• Une couche de haut niveau, qui regroupe essentiellement la couche application, ainsi que les interfaces
de type homme-machine.
• Une couche intermédiaire dans laquelle on retrouve
les couches transport et réseau.
Exemple proposé initialement par Mitola dans [3].
REE
N° 9
Octobre 2007
3
Dossier
• Une couche de bas niveau dans laquelle on retrouve
les couches MAC et physique.
L’ensemble de ces couches fonctionnent sur une plateforme radio logicielle (si possible idéale, mais notre
modèle fonctionne aussi avec une plate-forme RLR). Ces
plates-formes radio logicielles reposent sur une architecture matérielle d’exécution, qui généralement est hétérogène 3. Cette plate-forme est abstraite à travers une couche
d’abstraction, qui offre une transparence en terme d’implantation de composants logiciels de traitement du signal
que l’on y exécute.
Dans ce modèle, nous avons dans la colonne de gauche fait figurer certains capteurs, cela sera rappelé plus
loin (cf. section 3.4.1). Dans la colonne de droite sont
cités les domaines de recherche relatifs à la couche en
question avec lesquels la radio intelligente entretient des
liens très étroits.
Bien entendu comme notre objectif est d’optimiser le
fonctionnement de ces 3 couches de manière intelligente,
la radio intelligente aura aussi un lien très proche avec le
domaine émergent de l’optimisation inter-couches.
3. La bulle sensorielle radio intelligente
3.1. Généralités
Le concept proposé dans cet article consiste en une
nouvelle approche dans laquelle le terminal voyage avec
une “bulle” qui lui est associée. Cette bulle lui assure une
sécurité de transmission, c’est-à-dire sans interférences
avec les autres usagers. A tout moment le gestionnaire de
la bulle connaît parfaitement tous les signaux entrants et
sortants de la bulle. Grâce à une analyse fine de situations
déjà vécues, le terminal peut prédire et anticiper les émissions radioélectriques, de manière à optimiser des critères
prédéfinis.
Figure 6. La Bulle sensorielle.
Il s’agit à partir d’informations fournies par des capteurs de tout niveau d’analyser le “comportement” des
3
4
signaux entrants et sortants de la “bulle” et de prédire
l’évolution de ce comportement.
3.2. Analogie avec la « bulle physiologique et psychologique »
La première analogie que l’on peut faire est avec la
“bulle physiologique et psychologique” humaine. Cette
bulle est l’espace virtuel dont les dimensions sont fixées
par les 5 sens. Une personne se sent à l’aise et en sécurité
dans cette bulle, ou au contraire peut se sentir agressée si
cet espace est violé. Elle connaît tout ce qui se passe dans
cette bulle et traite de nombreuses informations concernant ce volume, qui lui sont fournies par ses sens. De la
même manière, la bulle sensorielle radio intelligente est
dimensionnée par les capteurs qui analysent les ondes
électromagnétiques reçues. Suivant notre analogie, nous
pourrions parler d’un sixième sens fourni par l’analyse de
ces ondes électromagnétiques. Par exemple, un capteur
peut détecter des trous dans le spectre (zones non utilisées
à l’instant présent dans le lieu d’écoute), un autre peut
déduire quel est le standard de communication le plus
approprié (selon des critères qui restent à déterminer)
pour effectuer telle connexion à tel service (voix, visioconférence, téléchargement…). La bulle sensorielle radio
intelligente permet de transmettre ces informations aux
couches hautes d’un terminal radio intelligent qui peut
alors initier de manière autonome une phase de reconfiguration [15] afin d’adapter son fonctionnement à la
situation, suivant des règles pré-établies ou apprises. Afin
d’avoir un maximum de degrés de liberté pour s’adapter,
on comprend qu’un système radio intelligent tire un
grand bénéfice des technologies radio logicielles.
3.3. Analogie avec la « bulle du conducteur d’un
véhicule »
La seconde analogie que l’on peut faire est celle de la
« bulle du conducteur d’un véhicule ». C’est une extension
de la « bulle humaine » au contexte de l’automobile.
Désormais, la « bulle » (sphère virtuelle) que l’on considère est celle qui se déplace autour d’un véhicule et qui est
associée aux sensations de son chauffeur. Un conducteur
doit avoir un certain nombre d’informations sur son environnement et comprendre leur implication par rapport à
son activité de conduite. Ce sont ses sens qui lui communiquent ces informations. Dans notre contexte, l’information
entrant dans la bulle est augmentée à l’aide de :
• moyens de signalisation, de signaux émis par les
autres véhicules et par l’infrastructure routière,
• règles universelles associées à la conduite et appliquées par tous les conducteurs,
• l’expérience associée à des situations déjà vécues et
Hétérogène signifiant ici que la plate-forme d’exécution est composée de circuits DSP, GPP, ASIC, FPGA, ...
REE
N° 9
Octobre 2007
I
qui lui permet de prédire et d’anticiper les dangers.
L’exemple suivant illustre davantage encore l’analogie.
Le but d’un conducteur est d’aller d’un point à un autre
sans accident, en respectant certaines contraintes de temps
de trajet, de nombre de kilomètres, de prix,… Il effectue
tout cela grâce à l’ensemble des éléments dont il dispose à
chaque instant dans sa « bulle ». De la même manière, un
terminal radio intelligent a pour but de transmettre un certain contenu sans erreur avec certaines contraintes en termes de qualité de service, durée, débit, prix…
Nous voulons ainsi faire le lien entre la bulle radio
intelligente et la bulle d’un véhicule. Il faut donc étudier
des situations équivalentes et en déduire des règles pour
la radio, en s’inspirant des règles existantes déjà éprouvées pour la circulation automobile. Par exemple, tourner
à gauche ou à droite pour changer de rue est équivalent à
changer de standard. Il faut dériver toutes les règles de la
radio intelligente. Y a-t-il une priorité à droite par exemple ? Un challenge est d’étudier tous les capteurs capables
d’apporter des informations pertinentes à un système
radio intelligent afin qu’il dispose d’un maximum d’information.
La complexité des traitements à effectuer par la couche physique d’un système radio intelligent requiert une
architecture de gestion de reconfiguration performant car
il est constitué d’une pluralité d’unités de traitement de
types différents (DSP, FPGA, ASIC…). Le système doit
en outre constamment être capable de s’adapter en fonction des besoins de l’utilisateur et des conditions de l’environnement (point d’accès, sélectivité du canal de transmission, interférences, disponibilité en terme de ressources spectrales). Le rôle de la bulle est de fournir tous les
moyens à la couche physique d’un système radio intelligent pour qu’il puisse effectuer les changements nécessaires à son fonctionnement afin d’être adapté au mieux à
son environnement.
L’objectif de la RI est de donner l’autonomie aux systèmes vis-à-vis de leur utilisation du spectre. Un terminal
est assimilé à un conducteur de voiture guidant son véhicule dans l’infrastructure routière, mais pour le terminal,
c’est à travers le spectre fréquentiel qu’il se déplace.
3.4. Les capteurs de la bulle sensorielle
Le terme capteur est ici utilisé dans son sens le plus
général. Il correspond à tout moyen fournissant une information pertinente. Cela peut, bien entendu, être un capteur au sens classique du terme (microphone, caméra,
antenne...) aussi bien qu’un algorithme de traitement du
signal (estimation du canal, rapport signal sur bruit...). On
peut classifier ces capteurs suivant 3 alternatives décrites
ci-après. La première consiste à les classifier en fonction
des couches de notre modèle simplifié (cf. section 3.4.1).
La seconde possibilité est un classement en fonction de
l’environnement considéré et est décrit en section 3.4.2.
Enfin la troisième possibilité sera un classement proposé
I
en fonction de 2 types de cartes que nous définirons et
utiliserons au chapitre 4.
3.4.1. Classification en fonction de la couche du
modèle simplifié
Nous avons, dans le paragraphe 2.3, déjà présenté
notre vision de la radio intelligente en fonction d’un
modèle simplifié en 3 couches. Nous proposons dans
cette section un classement des capteurs en fonction de la
couche considérée. La classification qui en résulte est
présentée sur le tableau 1 suivant.
Capteurs
Couches
Profil utilisateur (prix, opérateur, choix
personnels), son, image, position,
vitesse, sécurité,...
Application et interface hommemachine
« Handover » vertical inter et intra
réseaux et standards, charge sur un lien, Transport, réseau
reconnaissance de standards,...
Type d’accès, modulation, codage canal,
fréquence porteuse, symbole, estimation
Liaison Physique
de canal, lobes d’antennes, puissance,
ressources matérielle disponible,....
Tableau 1. Classement des capteurs en fonction du modèle
en couches simplifié.
3.4.2. Classification en fonction de l’environnement
considéré
Nous rappelons que, dans cet article, sont considérés
comme des capteurs, tous les moyens permettant d’observer l’environnement du terminal et de renseigner celui-ci
sur les modifications actuelles ou prévisibles de cet environnement. Nous proposons dans cette section de classer
les capteurs en quatre grandes familles : chaque famille
ciblant un type d’environnement particulier. Cette classification est présentée sur le tableau 2 suivant. Tous les stimuli reçus par les capteurs de ces différents environnements doivent être analysés et traités conjointement de
manière à obtenir la meilleure décision possible.
Dans ce tableau, nous avons illustré un cas particulier
d’environnement utilisateur. C’est un exemple souvent
cité dans la littérature du « Context Aware », il s’agit de
l’application médicale d’aide au diagnostic en temps réel,
pour des patients. Cet exemple, fait bien entendu appel à
des capteurs particuliers, adaptés à l’application souhaitée. La liste présentée n’est bien sûr pas exhaustive et peut
dépendre de la pathologie du patient.
Dans ce même ordre d’idées, nous pourrions prendre
d’autres exemples applicatifs tels que l’aide aux services
de sécurité (police, pompiers). Dans ce cas d’autres capteurs se révéleraient nécessaires.
REE
N° 9
Octobre 2007
5
Dossier
Classes
Capteurs
Environnement électromagnétique
Rapport signal à bruit,
Occupation spectrale
Détection de « trous » dans le spectre
Propagation,
Puissance reçue,
Réponse impulsionnelle du canal,...
Environnement réseau
Nombre de points d’accès,
Nombre d’utilisateurs,
Charge sur un lien radio,
Reconnaissance de standards,
Opérateurs disponibles,...
Environnement matériel
Niveau de batterie,
Consommation,
Niveau d’utilisation des ressources
(nombre de portes dans un FPGA,
mémoire nécessaire)
Environnement de l’utilisateur
Capteurs audio et vidéo,
Le profil utilisateur,
Choix personnels
Position, Vitesse,
Température extérieure, Luminosité,...
Cas d’école : contrôle médical
Environnement de l’utilisateur
Température de l’utilisateur
Pression sanguine,
Niveau de glycémie...
Tableau 2 . Classement des capteurs en fonction
de l’environnement.
4. Les cartes spatiales et spectrales
Grace à l’information fournie par la bulle, nous pouvons construire 2 cartes. La première est une carte spatiale classique sur laquelle nous reportons l’information
spatiale pertinente fournie par certains capteurs.
La seconde carte que nous construisons est totalement artificielle. Nous y reportons toute l’information
spectrale donnée par certains capteurs. Notre objectif est
de construire une carte dans laquelle le terminal peut se
déplacer dans l’environnement spectral. Nous souhaitons
poursuivre nos analogies en imaginant une nouvelle analogie entre le terminal se déplaçant sur cette carte et un
conducteur se déplaçant sur une carte routière. Un objectif complémentaire à ce niveau, serait de déduire de cette
nouvelle analogie des règles d’utilisation du spectre à partir des règles du code de la route.
4.1. La carte spatiale
Sur la carte spatiale est reportée l’information utile donnée
par certains capteurs... Cette information est par exemple, la
position des stations de base, la position des autres terminaux... Un exemple d’une telle carte est donné sur la figure 9.
4.2. La carte spectrale
Comme déjà précédemment expliqué, l’information
fournie par un grand nombre de capteurs va être mise à
profit pour construire une nouvelle carte appelée « carte
6
REE
N° 9
Octobre 2007
Figure 9. Un exemple de carte spatiale.
spectrale ». La construction d’une telle carte requiert un
certain nombre de règles. Ces règles sont à nouveau
déduites d’analogies avec la carte spatiale. Un exemple
d’analogie expliquant simplement et clairement cette
approche est la suivante. Sur cette carte spectrale, les routes seront les standards. De la même manière qu’une
autoroute permet d’écouler un trafic plus important qu’un
chemin vicinal, un lien radio Wifi permettra d’écouler un
débit plus important qu’un lien GSM. Pour cette raison la
largeur de la route tracée sera significative du débit
écoulé par le lien radio considéré. Nous avons donc établi
de nombreuses règles de construction de cette nouvelle
carte spectrale. Certaines sont données dans le tableau 3
ci- après. La carte ainsi construite peut être adaptée en
fonction de l’application et des besoins des utilisateurs.
Le terminal peut choisir de ne s’intéresser qu’à une partie
de la carte en fonction d’un objectif prédéterminé. Cette
carte est donc un outil représentant certaines des informations accessibles dans la bulle grâce aux capteurs.
Quelques exemples de cartes obtenues avec des scénarios
les utilisant sont présentés au paragraphe 5.
Remarque : Il est très important de noter ici que cette
carte spectrale évolue constamment en fonction du déplacement du terminal dans la carte spatiale. La carte spectrale a une existence limitée en temps et position.
Carte routière
Conducteur
Véhicule
Type de route
Largeur de la route
Nom de la route
Route à sens unique
Vitesse du véhicule
Bifurquer( changer de type de route)
Bifurquer sur le même type de route
Route interdite
Police
Voie privée
Limitation de vitesse
Priorité à droite
Embouteillage
Carte spectrale
Gestionnaire du terminal
Terminal
Type de standard
Débit du standard
Opérateur
Diffusion
Débit du terminal
« Handover » vertical
Changer d’opérateur pour la même
standard
Bande interdite
Organisme de régulation
Bande réservée
Débit alloué limité
Accès prioritaire à un standard
Lien radio surchargé
Tableau 3. Exemples de règles pour construire la carte spectrale.
I
I
UN EXEMPLE DE CAPTEUR : LE CAPTEUR DE RECONNAISSANCE AVEUGLE
DE STANDARDS
Pour illustrer cette bulle sensorielle intelligente nous présentons un exemple de capteurs que nous étudions
actuellement [12]. Celui-ci, comporte les trois phases présentées sur la figure 8.
Dans une première phase, le signal large bande reçu est analysé de manière très simple et très grossière de
manière à déterminer les plages de fréquence contenant une énergie significative. Cette analyse est effectuée
de manière itérative telle que le montre la figure 7.
Ensuite dans les bandes sélectionnées une analyse beaucoup plus précise est effectuée, durant la seconde
étape. Cette analyse permet d’accéder à des informations comme la bande passante du canal, la distinction
entre signal mono et multi porteuse, le type d’étalement de spectre... Dans cette seconde étape, la bande passante du canal, qui a été précédemment identifiée comme étant un paramètre discriminant, sera déterminée
suivant une méthode publiée dans [6], que nous avons améliorée en prenant en compte de nombreux standards
IEEE 802.x apparus depuis [6]. Notamment, le résultat d’une analyse temps/fréquence permet de distinguer
le saut de fréquence de l’étalement par séquence directe et est directement utilisé pour discriminer Bluethooth
du Wifi IEEE.802.11.b, deux standards qui ont des caractéristiques très proches [14]. De même, l’étude des
cyclostationarités sur la bande étudiée donne des informations sur l’absence ou la présence d’un signal de
télécommunications [13], ainsi que sur les caractéristiques de ce signal éventuel (type de modulation...).
Chacune de ces informations n’étant pas suffisamment significative en soi, une fusion de l’ensemble des
ces informations est réalisée en troisième étape. Le résultat en sortie de l’organe de fusion est une décision la
plus fiable possible sur le standard présent dans la bande considérée. La manière la plus simple de réaliser
cette fusion est d’appliquer des règles logiques mais n’est pas très robuste. Des solutions plus évoluées basées
sur des réseaux de neurones sont actuellement à l’étude.
Figure 7. Détection itérative de
l’énergie.
Figure 8. Le capteur reconnaissance aveugle de standard.
REE
N° 9
Octobre 2007
7
Dossier
Figure 10. Choix du standard en fonction du coût.
Figure 11. Equivalence voie privée: standard réservé.
4.3. Classement des capteurs suivant le type de carte
La carte spectrale est un outil basé sur l’information
de la bulle de manière à être utilisé par le terminal en
s’inspirant de règles du « code de la route ». De nombreux capteurs contribuent à construire la carte spectrale.
Le terminal peut utiliser d’autres informations de la
bulle (telles que : le nombre de terminaux détectés, leurs
positions relatives, la position des stations de base...) et
positionner sur la carte spatiale ces informations, de
manière à les utiliser intelligemment (conjointement ou
non avec la carte spectrale). Nous pouvons donc classer
les capteurs suivant le type de carte.
Type de carte
Capteurs
Carte spatiale
Direction d’arrivée des signaux.
Position sur la carte.
Position des stations de base et des
points d’accès.
Position et distance vis-à-vis
des autres terminaux...
Carte spectrale
Fréquence porteuse, fréquence
symbole, occupation du spectre,
reconnaissance de standards,
de modulation, de l’intervalle
de garde, du codage, de l’accès,
du nombre d’utilisateurs,...
Tableau 4. Classement suivant le type de carte.
5. Quelques analogies avec des situations
routières
Nous présentons dans ce paragraphe trois exemples,
très simples, d’analogies entre un véhicule confronté à
une situation routière particulière et un terminal
confronté à une situation analogue dans la nouvelle carte
spectrale telle que définie précédemment.
Le scenario de la première analogie décrite sur la figure
10 consiste à faire un choix entre une communication à haut
débit et un coût faible de transmission. Comme cela a été
défini dans le tableau 3, la largeur de la route sur la carte
spectrale est directement reliée au débit du standard. En
fonction des besoins utilisateurs, le terminal choisit ici la
route la moins chère, celle qui correspond à l’opérateur 3.
8
REE
N° 9
Octobre 2007
Figure 12. Equivalence embouteillage=encombrement spectral.
La seconde analogie de la figure 11 considère une
route débouchant sur une voie privée interdite. Pour notre
terminal cela signifie que ce standard est réservé. Il peut
s’agir, par exemple, d’un réseau professionnel. Dans cette
situation le terminal bascule obligatoirement sur la voie
de l’opérateur 1.
Dans le troisième scenario présenté sur la figure 12,
une équivalence entre un embouteillage routier et une surcharge d’un lien radio est présentée. Dans cet exemple,
nous avons à la fois la diminution de vitesse imposée par
l’embouteillage (équivalent à la diminution du débit du
terminal liée à la diminution de la ressource disponible) et
le fait que le terminal change de route donc de standard
(en fait ici, d’opérateur) pour éviter cet embouteillage.
6. Conclusion
Nous avions conclu un précédent article REE sur la
radio logicielle [7] par la phrase suivante :
« Enfin, une orientation importante à nos yeux est la
volonté d’aller vers une RL de plus en plus « intelligente »
(la Cognitive Radio en anglais) ». C’est cette radio intelligente que nous avons présentée dans cet article, à l’aide
d’un nouveau concept : la « bulle sensorielle ». Celle-ci,
grâce à de nombreux capteurs de toute nature, fournit des
informations qui sont utilisées par le terminal intelligent.
Nous avons expliqué comment il peut, par exemple, adapter
son comportement à l’aide d’une carte spectrale construite
I
à partir des informations fournies par la bulle.
A l’aide d’analogies routières, nous proposons de
déduire des règles de « déplacement » sur cette carte
spectrale. Cela est en cours d’étude et fera l’objet de futures publications.
7. Remerciements
Ce travail a été effectué dans le cadre du projet E2R II
qui est finance par le Sixième Programme Cadre de la
Commission Européenne. Ce document n’exprime que le
point de vue des auteurs et n’engage en rien la
Communauté dans l’utilisation qui peut en être faite. La
contribution de collègues du consortium E2R II est ainsi
remerciée.
Les auteurs remercient aussi la Région Bretagne, pour
le support financier apporté à ce travail.
Références
[1]
I
J. MITOLA, “The software Radio Architecture”, IEEE Communications Magazine, May 95, pp. 26-38.
[2]
J. MITOLA, “Cognitive Radio: An Integrated Agent Architecture for Software Defined Radio”, Ph.D. dissertation,
Royal Inst. of Tech., Sweden, May 2000.
[3]
J. MITOLA & al., “Cognitive radio: Making Software Radios
More Personal”, IEEE Personal Communications., vol. 6, no. 4,
pp. 13-18, August 1999.
[4]
S. HAYKIN, “Cognitive Radio: Brain-Empowered Wireless
Communications ”, IEEE Journal on Selected Areas in
Communications, vol. 23, no. 2, pp.201-220, February 2005.
[5]
F. K. JONDRAL, “Software-Defined Radio-Basics and Evolution to Cognitive Radio”, EURASIP Journal on Wireless Communications and Networking 2005:3, 275-283.
[6]
C. ROLAND, J. PALICOT, “A New Concept of Wireless
Reconfigurable Receiver”, IEEE communications Magazine,
July 2003.
[7]
J. PALICOT, C. ROLAND, « La Radio logicielle : Enjeux,
contraintes et perspectives », REE n° 11, décembre 2001.
[8]
http://www.wireless-world-research.org/
[9]
http://www.gnu.org/software/gnuradio/
[10] T. HENTSCHEL (Dresden University of Technology), SORT,
Contribution : SORT/WP3100/TUD/002.
[11] TRUST, IST-1999-12070, D3.1.1, “Proposal and Initial
Investigation of Certain Know and Augmented Analogue
Signal Processing Techniques for Future Flexible Transceiver
architectures”, 29 September 2000.
[12] R. HACHEMANI, J. PALICOT & C. MOY, “A New Standard
Recognition Sensor for Cognitive Radio Terminal”,
EUSIPCO’07, Poznan, Poland, September 2007.
[13] M. GHOZZI, M. DOHLER, F. MARX & J. PALICOT, “Cognitive Radio: Methods For The Detection of Free Bands”,
Comptes-Rendus Physique, Elsevier, volume 7 September
2006, pp 794-804.
[14] M. GANDETTO, M. GUAINAZZO & C. S. REGAZZONI. “Use
of Time-Frequency Analysis and Neural Networks for Mode
Identification in a Wireless Software-Defend Radio
Approach”, Eurasip Journal of Applied Signal Processing,
Special Issue on Non Linear Signal Processing and Image
Processing, 13:1778{1790, October 2004.
[15] L. GODARD, C. MOY & J. PALICOT, “From a Configuration
Management to a Cognitive Radio Management of SDR
Systems”, IEEE CrownCom’06, 8-10 June 2006, Mykonos,
Greece.
[16] A. NAFKHA, R. SÉGUIER, J. PALICOT, C. MOY &
J. P. DELAHAYE, “A Reconfigurable Base Band Transmitter
for Adaptive Image Coding”, ST Mobile SUMMIT 2007, 1-5
July 2007, Budapest, Hungary.
[17] C. MOY, A. BISIAUX & S. PAQUELET, “An Ultra-Wide Band
Umbilical Cord for Cognitive Radio Systems”, PIMRC’05,
Berlin, September 2005 (IEEE Personal Indoor and Mobile
Radio Communications).
L
e
s
a
u
t
e
u
r
s
Jacques Palicot a obtenu en 1983 le diplôme de Docteur en traitement du Signal de l’université de Rennes. De 1988 à 1991, il a
travaillé sur les techniques d’égalisation appliquées aux nouveaux
systèmes de télévision améliorée, pour le centre de recherche de
TDF à Rennes : le CCETT. Ensuite, depuis 1991 ses études concernent le domaine des communications numériques et tout particulièrement le domaine du traitement adaptatif du signal.
Depuis 1998, il s’est particulièrement investi dans les nouvelles
technologies connues sous le nom de “Software Radio” et
« Cognitive Radio ». Depuis octobre 2003, il est Professeur et responsable de l’équipe de recherche Signal Communications &
Electronique Embarquée (rattachée à l’IETR, entité CNRS-6164), à
SUPELEC sur le Campus de Rennes.
Rachid Hachemani a obtenu le diplôme d’ingénieur en électronique de l’université des sciences et de la technologie d’Oran en
2003. Il a obtenu en 2006 le diplôme de Master2 recherche MARS
(Systèmes et réseaux, Architecture et micro-technologie) de
l’Institut National des Sciences Appliquées de Rennes. En thèse de
doctorat à l’École Supérieure d’Électricité : Supélec, au sein du
laboratoire SCEE, il travaille actuellement sur le domaine de la radio
logicielle et de la radio intelligente. Une partie de ses travaux de
thèse sont une contribution au projet Européen E2R (End-to-End
Reconfigurability).
Christophe Moy a obtenu en 1995 les diplômes d’ingénieur et de
DEA de l’INSA de Rennes. Il a effectué son doctorat dans le
domaine des techniques à étalement de spectre et a obtenu le titre
de docteur de l’INSA de Rennes en 1999 dans la spécialité électronique. Il a ensuite travaillé 6 ans dans le laboratoire de recherche
européen de Mitsubishi Electric à Rennes. Il a participé à la création et à l’activité de l’équipe « Software Radio » et a été le représentant de Mitsubishi Electric au SDR Forum de 1999 à 2004.
Depuis 2005, il est enseignant/chercheur à Supélec, sur le campus
de Rennes, et effectue ses recherches dans l’équipe SCEE de
Supélec. Il est impliqué dans les activités de radio logicielle
(« Software Radio ») et de radio intelligente (« Cognitive Radio »).
Il a participé et participe encore à de nombreux projets collaboratifs
tels que le projet intégré E2R et le réseau d’excellence Newcom de
l’IST au niveau européen, les projets RNRT A3S, IDROMel et RNTL
Mopcom au niveau national et du pôle de compétitivité Images &
Réseaux de Bretagne.
REE
N° 9
Octobre 2007
9
'-
$
"#0 -
504
-
"
, '< 0 '#0.
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
'-
$
"#0 -
504
-
6
"'
"
1
9
9
2
<
>&
6
+
2
6
1$ $
!
1 "
'-
'- "
"
A
<
.$
,$
# #$
03
6
>
>
>
A
>
"
>
A
2
9
>
>
<
+
2
9 >
<
6
<
2
+
2
9
E
9
9>
<
6
&
96
9
<
( '$
/
6 A
>&
5
,$
03 '9
> 6
A
>
<
,
A
6
+
+
<
6
N
K
9
G
+
9
K
<6
>
<
9
&
>
>
K
+6 6
+
6
P,
9
(
1
K
A
H
2
9
6
6
A
9
G
>
( &
,
6
+
9
>
>
6
6
>
6
(
6
)<
A
K
9
%
)
"
, '< 0 '#0.
>
>
A
6
9>
6
H<
9
9
<
%
K
9
6
2
6 <
9
6
92
H
< ( &
9
6
+
+
6
>
2<
6 <
6 <
2
,9
+
>
9
0
(
2+ K
<
G
9
$#
6
)<
%
O
&
'-
504
$
"#0 -
-
G
&
5
2
"
>
+
>
, '
%
&
'
%
'-
9
> '9
6
> >
>
A9
. $
A
A
<
<
6
G
H
"
, '
A9
+
+
+
>
'
5 >
+
6 <
6 <
C
6
,
>
&
G'
9
6
"
( &
H
6
<'
"( &
)
=&
6
>
>
>&
="
5
9
>
> 5
"
>
9
9
9
<
5
9
6
6
5
Q
> >
> >
'
&
> ,9
*9
"1 /
> >
9
$
A
,
2 &
, '
>
"
(
2
, '
2
<
'" %
" '
>
'- "
6
<A
6
1 "
6
>
<6
>
"
1$ $
!
>
+
>
>
"
.$
,$
# #$
03
9
B
'9
96
>
6
(
, '
"
9
)
%
,
9
- ,
, '
"1
'
)
&
%
9
/
=
6
6
<
96
'"
1
9
6
"
, '
>&
2
"
>
)
)
2
"2 %
"
, '
1
(
)
2 %
9
1
9
A
A
9
9
<
&
9
=
B $
5 3-$ %,
&
9
8
>
,
&
A (,
=&
=
6
9
A
( &
,96
+
<
" &
'
A
)
'"
( > 6
"
, '< 0 '#0.
%
<
9
O
69
9
F
'-
504
$
"#0 -
-
H
&; % # @
7!
#
<
> >
6
,
&
#' %
(
<
6
>
9
&
.$
,$
# #$
03
1$ $
!
$
1 "
! &;%
>
6
L
9
<
2
'
6
9
,
<
2
'
9
6
<
6
>6
J
39
> 6
> >
)
6
<
9
<
>
>
+
+
,
(
'
6
,
A
K
9
>
9
$
<
O
>
6
<
( &
2
6
%
<
<
M
9
'
'- "
<
'
>
'-
> 6
<
9
6
>
6
%
> >
'9
> >
( &
'9
5K
< 9
K
,9
9
69
(
)
6
+ +
6
2
9
6
&
<
9
9
9
K
(
6
9
< 9
Q
2
%
69
'
69
9
<
+ +
2
&6
H
6
P #
&
G
,9
) 3
+
<
6
A
2
9
6
>
<
<
6
/
2
6
$
9
6
6
9
9
9
(
6
(
,
+
6
69 A
<
&
9
>
,
6
"
, '< 0 '#0.
2
K
>
=
'-
504
$
"#0 -
-
I
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
"%
!
,
2
&
$
2
<
( &
2
6
<
6
N$
9 >
6
2
'
9
<
6
6
9
2
<
69
&
6
,
9
>
6 <
96
"'
(
9
6
&
9
"'
%;
>
"
>
< 6
9
6
A <
>
96
<
6
6
>&
'9
<
9
>
)
6
,
6
A
96
,
*
96
>
2
-1
>
6
(
<
> >
,$
'0# .
6
\<
<
'
+
&
&
&
'
&
>
$
9
>
9
2 9
9
6
#
2
6
>
9
2
9
'
+
9
>
9
>
2
>
9A
6
2
6
2
2
A9
6
9>
6
9 A
<
9
2
<
2
<
>&
>
2
6
*
9
2
<
6
2
<
2
2
9
>
>
,92
6
K
>
9
>
9
*9
O (
>
9
>
5
>
A
<
6
69
,9
>
>
<
1 1
5
9
<6
+
3
>
;
/
6
"
>
, '< 0 '#0.
>
9
(
9
'-
$
"#0 -
4
504
-
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
0
A
>&
"
() '*+
%
, '< 0 '#0.
'
" '
*
@
+0
:
# "
0
5 $
0 (1#/#"("#
/ +<
+" 1
! ( 3# #)
3
+0
+0
> EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing <
1
High-Level Design for Ultra-Fast Software
Defined Radio Prototyping on Multi-Processor
Heterogeneous Platforms
Christophe Moy, Mickaël Raulet
Abstract—The design of Software Defined Radio (SDR)
equipments (terminals, base stations, etc.) is still very challenging.
We propose here a design methodology for ultra-fast prototyping
on heterogeneous platforms composed of GPPs (General Purpose
Processors), DSPs (Digital Signal Processors) and FPGAs (Field
Programmable Gate Array). Lying on a component-based
approach, the methodology mainly aims at automating as much as
possible the design from a functional verification to a multiprocessing heterogeneous implementation. We insist that a
complete HW/SW co-design approach is still currently not viable
with any technique. The proposed methodology is based on
SynDEx CAD design approach originally dedicated to networks
of multi-GPPs. We show how this has been changed to make it
first coherent with an embedded context of DSP for instance. The
implication of FPGAs is then addressed and integrated in the
design approach with very low restrictions. Apart a manual
HW/SW partitioning indeed, all other operations may be kept
automatic in a heterogeneous processing context. The targeted
granularity of components to be assembled in the design flow is
that of the size of a FFT, a filter or a Viterbi decoder for instance.
The re-use of third party or pre-developed IPs is a basis of this
design approach.
Moreover, the methodology is capable of mixing in the same
design the processing of radio as well as application layers,
merging and factorizing the most computation-demanding
processing elements of an SDR equipment. In addition, the
proposed design methodology is not restricted to SDR system
design but can be useful for any real-time embedded
heterogeneous design in the context of prototyping.
Index Terms—Software-defined radio, design methodology,
heterogeneous platform, cross-layer design
T
I. INTRODUCTION
HIS paper proposes a method that meets most of the
requirements for the design of Software-Defined Radio
(SDR) equipments. Research on SDR aims at investigating all
the topics that could help improving enabling technologies for
future radio systems [1] [2] [3]. We address in this paper the
particular topic of SDR equipments design, which is still a
Manuscript received March 7, 2008.
C. Moy is with SUPELEC/IETR, Rennes, France. (phone: +33 2 99 84 45
84; fax: +33 2 99 84 45 99; e-mail: [email protected]).
M. Raulet is with INSA/IETR, Image Group, Rennes, France. (e-mail:
[email protected]).
very open subject. This is also very similar to any real-time
embedded heterogeneous design issue such as image
processing for set-top boxes for instance. SDR design is
indeed a co-design issue that is not restricted to SDR, but also
concerns most of embedded real-time equipments. If we could
discriminate SDR from other equipment categories, we could
say that SDR brings the flexibility paradigm at its highest
extremity. That is why SDR equipments are expected to be
composed of a heterogeneity of flexible processing
components, such as DSPs, GPPs, FPGAs and ASICs.
Moreover, the flexibility is not only considered at design time
in SDR systems, but also at running time. An SDR system
consequently should be capable of taking benefit from the
potential reconfigurability of the flexible components it is
composed of. This implies an adequate design methodology
that transforms this potential reconfigurability in effective
flexibility at run-time [4].
The existence of a reconfiguration management architecture
at the level of an equipment, and adequate reconfiguration
management infrastructure at the system level [5] must not be
forgotten. However reconfiguration management is not directly
included in the design methodology of this paper, but we show
how the approach is compatible to its insertion, in a future
step. This assertion relies our deep experience on the
reconfiguration management issue in [6] and [7]. Reves et al.
in [8] are also investigating ways to solve these challenges
with the same philosophy.
There are several ways of considering SDR design. The
approaches can be divided into two distinct categories:
- fast prototyping for lab demos,
- commercial equipments design.
Time has not come yet to consider the second category as
realistic now. Only sub-parts of the design flow are achieved
and they are mainly common with the prototyping flow. We
particularly address in this paper the topic of fast prototyping.
Usual heterogeneous embedded design approaches mix several
independent tools dedicated to hardware or software, which
require a long effort and is subject to be source of errors. That
is to answer this issue that we tackle the issue from a higher
level in order to imply automation and acceleration in the
design process.
The paper is organized as follows. The requirements of
heterogeneous co-design as addressed in SDR area are
explained in section II. But there are many ways of addressing
the concerned co-design issue. This is explained in section III
of this paper and section IV extracts the main features of a
realistic flow. Section V proposes a possible instantiation of
this flow and illustrates how the proposed heterogeneous
approach answers the requirements of section III. Finally, SDR
design examples based on the proposed approach are given in
VI, before drawing some conclusion.
transmission side. Consequently, multi-processing is an
intrinsic requirement of SDR to speed up the required
computations.
RF
II. SDR DESIGN ISSUES
SDR design is very challenging and no solution covers all
the expectations of radio engineers. This section intends to list
the main issues to be solved. It can be noted that other
embedded systems have the same requirements, which does
not restrict our approach to SDR. Image processing for
example is completely in the scope of the proposed
methodology.
The ideal SDR design methodology should offer a set of
features facilitating the design of systems having the following
main characteristics:
- flexibility,
- multi-processing,
- heterogeneous processing,
- object-oriented design facilities,
- HW-SW co-design,
- embedded constraints,
- signal-processing simulation,
- hardware abstraction.
Moreover, portability and re-usability are a constant
concern, as well as the elevation of abstraction in order to
simplify the global design process. This list is not exhaustive
but it is sufficiently challenging to be considered as a very
ambitious target.
A. Flexibility
The salient feature to be considered first for SDR is
flexibility. All the previously listed elements have to be
considered in light of this keyword. Radio design has been
based for more than a hundred years on analog electronic
component, which imposes a pre-defined transmission scheme.
SDR is the answer to this limitation thanks to last technology
progresses.
Flexibility is achieved through a digital approach. The radio
applications become digital (the ideal would be software) and
can be played in any hardware platform. The radio application
(software here) consequently can be changed, just like a piece
of software that can be changed on a computer. This is the end
of a century of fixed-behavior radio. This is the software radio
era.
B. Multi-processing
SDR is defined as an underdeveloped version of the ideal
Software Radio of Fig. 1 [1]. Software Radio encapsulates
very demanding digital signal processing capabilities since it
brings closer to the antenna the digital-to-analog conversion at
the reception side, and digital to analog conversion at the
2
RF
Fig. 1 – Ideal software radio equipment architecture
Software radio is currently not realistic except for low
carrier frequency transmission systems. More often, at least a
stage of frequency translation from radio frequency (RF) in
intermediate frequency (IF) or baseband (BB) is done in
analog between the antenna and the conversion. This
pragmatic approach illustrated in Fig. 2 is called SoftwareDefined Radio or SDR.
IF
RF
BB
IF
RF
BB
Fig. 2 – A realistic SDR equipment architecture
Even if it relaxes the digital processing components, this
still remains very challenging. The constraints are so strong
that the direct approach of Fig. 2 may fail and imposes
sometimes to investigate signal processing alternative
solutions in the most demanding contexts, as for UWB for
instance in [9] and [10]. Nevertheless, in the most relaxed
situation of baseband digitization, digital operations are timed
at a multiple of the signal bandwidth, which can reach tens of
MHz in a single band terminal for UMTS, and hundreds of
MHz for multi-band base-stations. A multiple of GHz will
soon be required for multi-standard SDR base-stations or
UWB transmissions. Most demanding operations are those that
are over-sampled compared to the symbol rate, as filtering at
the exit of the transmitter or the entrance of the receiver, as
well as the very complex algorithms in reception, such as turbo
decoding.
Consequently, digital processing architectures are very
complex and combine several devices of the same nature or
several kinds of processing devices. This last consideration is
the topic of next paragraph. But multi-processing, without
heterogeneity, is a topic itself. An SDR design methodology
must provide facilities to map and program a distributed
system. Designing SDR equipment means allocating software
elements, composing the radio application, on the hardware
devices of the equipment.
It has to manage communications as well as processing. It
has to schedule communication and processing periods of each
processing unit. This includes means of scheduling prediction
and optimization in order to perform an automatic mapping
and scheduling of the SW application to the HW platform.
Moreover, from this follows the demand of a generation of the
communication glue associated with the processing operations.
C. Heterogeneous computing
The heterogeneity of devices indeed permits to take benefit
of the different advantages of each category of processing
components. Let us keep in mind that flexibility is of major
interest for SDR. Consequently, most popular processing
devices in SDR design are the following:
- GPPs,
- DSPs,
- FPGAs,
- digital ASICs,
- analog ASICs.
As explained earlier, analog ASICs are still necessary. SDR
can not get away from antennas, amplifiers and analog filters
which is a condition of the transformation from the electric to
the electro-magnetic world. Digital ASICs and FPGAs are able
to process heavily parallel computing. ASICs are a hardware
solution particularly suited for low power, very high speed
computing. But they are dedicated to high quantity markets in
order to compensate the chips design cost. FPGAs provide
some of the hardware (HW) capabilities at lower buying and
developing cost. But FPGAs offer HW flexibility, that is most
important for SDR. Two levels of flexibility may be
considered for FPGAs. First a flexibility at design time which
permits to make several designs in a FPGA and run them at
different periods of time. This has been the usual manner to
use FPGAs for years and it is very profitable. But a new
opportunity is given by FPGAs now, which consists in
dynamically reconfiguring a sub-part of the gates of the FPGA
while the rest of the gates can pursue its processing. Delahaye
et al. have defined and validated a design methodology as well
as flexible processing elements design approaches particularly
interesting for SDR using partial reconfiguration of FPGAs
[11]. Other domains are also making investigations, as for
crypto [12], but this is out of the scope of this paper.
DSPs offer the best trade-off between processing power and
power consumption [14]. They are particularly accurate in the
SDR context, all the more so as manufacturers integrate more
and more co-processing capabilities dedicated to radio signal
processing such as Viterbi decoders in the TI C6416 for
instance. Their capability to be programmed in C language
makes them really pertinent for SDR, enabling portability from
one DSP device to another.
GPPs have become a potential alternative for a few years
with a tremendous increase in their computation power. GPPs
3
have the advantage of supporting high-level programming
languages, as well as highly advanced operating systems. This
last feature may be helpful for portability but also for
reconfiguration management.
D. HW-SW co-design
The choice of the repartition of the processing elements
between the different categories of processing devices has to
be done depending on the capabilities of each category of
components in terms of processing power, but also power
consumption, heat dissipation, cost, etc. This topic is very
complex and no automatic solution exists today. It remains
mostly based on engineers’ experience. The main challenge is
to separate software (SW) from hardware (HW). This should
be understood in the sense that we call SW something that runs
a processor, and HW what is executed on a FPGA. We choose
this distinction in the rest of the article.
By HW-SW co-design, we have to restrict here to the
capability of the design flow to integrate at a high-level,
processing elements of different nature in code, such as C or
VHDL, to be run in different categories of processing
components, such as GPPs, DSPs, FPGAs or ASICs. We also
take into consideration the specific case of parameterizable
ASIC since, if the processing is fixed, the configuration of
their parameters may be done on a DSP within a C function. In
this sense, an ASIC is considered as a processing unit running
a dedicated functionality.
E. Object-oriented design facilities
SDR is an evolution of the radio design. Radio design is
coming from the electronics field which is very much linked
with the notion of hardware component. This is mainly due to
technology reasons. Electronics components have been for a
century the only bricks available to build a radio. On the other
side, computer engineering integrated the electronics paradigm
of “components” in the SW domain. This makes a componentbased design approach [15] definitely suitable for SDR design
demand [6]. Advantages are numerous, but let us highlight the
following at least: modularity, portability, replacement by
reconfiguration, re-usability, etc. The reason is the componentbased approach brings a natural separation between signal
processing and execution architecture.
The object-oriented approach indeed gives the opportunity
to increase the level of abstraction of the design. This has two
main consequences. First, SDR design may take benefit of the
latest progresses in high-level design. The use of UML for
embedded systems design in general [16] and SDR design is
an example [17]. Secondly, we argue that the temptation of
inventing a completely brand new design flow for SDR must
be avoided. SDR design must be based on other design
technologies in order to take benefit from their advances. In
that sense, we defend the following approach for SDR design.
An SDR design flow has to give the opportunity to integrate
processing elements (let us also call them IPs here, for
Intellectual Property) made by other specialized designers or
tools. Then SDR design may be seen as a kind of IPs
integration process. This relaxes the SDR design flow from the
very intimate processing elements or IPs design and gives a
higher level position to the designer thanks to a componentbased approach. This permits to consider signal processing
elements as black boxes with very general characteristics, such
as for instance execution time, mean power consumption,
memory use, etc., and not as a succession of atomic
operations, or a set of gates. Another advantage, if not the
most important is that it permits to keep the optimality of each
IP design thanks to dedicated tools for each domain, as a
synthesizer for a FPGA, or a compiler for a DSP. It looks
unfair to pretend making a better tool than the specialists of
each of these domains. Moreover, this opens the possibility to
benefit from third party developers for IPs. This announces the
proliferation of IPs integrators on generic platforms in the
future. The re-use of IP, moreover, is a guarantee of reliability.
It also gives the opportunity to benefit from the use of IP, the
content of which is protected.
F. Embedded constraints
Embedded constraints can be the following:
- hard real-time,
- memory limitation constraints,
- power consumption,
- cost,
- size.
In the SDR context, first step to overpass is often to solve
hard real-time data flow constraints. It often implies to
consider memory optimization as well. These two points are
the state-of-the art of rapid prototyping approach, and are of
course also to be considered in commercial equipment design.
Real-time guarantee is mandatory. It may consequently be not
preferable to rely on real-time operating systems (RTOS) in
that context, compared to a static scheduling of operations.
Power consumption, as well as cost and size are not
considered in a prototyping approach but are for commercial
systems, and consequently are not in the scope of the approach
proposed in the paper. A long term goal is to integrate these
considerations from the very beginning of the design phase,
which is currently done manually. Research studies on power
consumption are numerous [2], regarding the importance of
the topic. Some are based on a prediction [18] or a measure of
the power [19]. Attempts to help taking into consideration
several of these parameters at the very early steps of a design
flow have been proposed [20] but without providing an
integrated resolution method yet.
G. Signal-processing integrated simulation
Another required feature of an SDR design flow is the
capability to simulate the system in order to check its
functional and non functional behavior. A major point to be
stressed is that there is almost no interest for such a simulation
if the conformity between the simulated version and the final
version installed in the real system is not guaranteed. If not,
the checking will have to be done again in the platform.
An alternative is to give the possibility to implement very
4
fast the system on the hardware platform and directly run the
real system to check it in-situ.
H. HW abstraction
Abstraction layers may have several roles in an SDR
system. At design time, it may be seen as a technique that
enables to abstract the signal processing IPs from the hardware
target, as a Java code that is planned to run on a Java virtual
machine. This is not very popular for SDR as it may decrease
processing performance too much. Another approach is to take
benefit from a higher level design approach to hide some
details of the hardware implementation. This may be achieved
through the use of only few features for the characterization of
IPs execution on this hardware. This permits to speed-up
design phase and direct implementation on the platform, where
the very intimate measurement may be done with exact results.
Another abstraction is acting at run-time. This may be done
thanks to a layer of middleware. The ultimate proposal in that
sense is the Software Communication Architecture (SCA)
selected by the US Department of Defense (DoD) in the JTRS
(Joint Tactical Radio System) program [21]. A CORBA
(Common Object Request Broker Architecture) software bus is
proposed in the SCA to support the abstraction of the
hardware for the software. This is a major source of concern
for the radio design community. Solutions trying to go round
the full-CORBA approach while supporting some SCA
compatibility are numerous [22]. They mostly consist, for realtime signal processing, in bypassing CORBA which should
definitely be restricted to reconfiguration management. As
explained earlier, reconfiguration management is not in the
scope of the current paper but has to be taken into account, as
attempted in [8].
I. A design approach larger than SDR domain
The list of properties for SDR design approach has many
commonalities with other application fields. This means that
solutions targeting SDR design may also be applicable to other
domains. In the context of image processing in particular, all
listed characteristics are valid, even flexibility. This is
particularly relevant regarding the very last versions of video
coding schemes from MPEG standardization groups. MPEG-4
for instance targets a large scale of coding/decoding schemes
which implies flexibility by nature. This requirement is also
stressed at its maximum in last RVC (Reconfigurable Video
Coding) where reconfigurability is a goal itself [23]. Another
example is the need to solve massively heterogeneous multiprocessing issues of the networking infrastructure
implementing transcoding of video and audio formats in order
to broadcast multimedia contents on different media types
(from high definition HD TV to handheld DVB-H).
III. CURRENT AND INNOVATIVE SDR DESIGN APPROACHES
A. Today: no existing solution
Regarding the design flow requirements listed in the
previous section, there currently exists no integrated solution
for the design of SDR systems and it may be expected there
will never be. Consequently, a set of solutions have to be
addressed separately and combined. We would rather expect
the use of a new tool combined with already existing tools
dedicated to sub-parts of the design. We propose here to give a
summary of the potential solutions that can be expected, on the
basis of the state-of-the-art in terms of already existing
languages and commercial proposals.
B. Co-design languages
If we consider the SDR design issue as a co-design issue, let
us consider first the languages that have been created these last
10 years for that purpose. We may refer to SystemC [25],
HandelC from Celoxica, and CatapultC from Mentor Graphics
to speak about the most used ones. Initial goal was to provide
designers with a completely integrated flow for the joint design
of the SW, namely the coding of the processors, and the HW,
namely the coding of the FPGAs. By integrated it means using
a unique language that would be compiled for the SW part of
the system and synthesized for the HW part after a joint
edition, simulation, debug and validation process. However,
SW and HW worlds are intrinsically different if not opposite,
which makes it very challenging. Another idea was to facilitate
the HW design while getting rid off the VHDL coding and
turning it to C-like. The previously cited languages proposed a
C-like programming method for HW while creating C++
libraries expressing the needs of HW design, such as binary
and vector data types, or the possibility to express parallelism.
But all C++ code can not be converted to HW, which provokes
many synthesis impossibilities. The restrictions are so
important that we may exaggerate till the writing of VHDL in
C++. Moreover, this approach is far from providing the most
efficient FPGA design in terms of processing speed, surface
occupation and power consumption. This finally makes it
accurate for co-simulation, but as the HW part has to be
manually redone, this completely breaks design flow as the
validation at high level is not guaranteed. We must insist here
in the following point: a 95% synthesis of the SystemC code
for a HW target is not enough if it takes a huge amount of
energy to solve the 5% remaining issues, which is often the
case. Finally, these co-design languages are often restricted to
transfer level simulations at their best capability.
C. MathWorks
A very popular method for fast prototyping is proposed by
Mathworks, based on Simulink. The first solution was
proposed in the early 2000 in association with LYRtech, an
SDR platform provider [26] and has been generalized to many
other providers since. It consists in providing a direct bridge
between a Simulink environment for signal processing
5
simulation and the execution on a heterogeneous hardware
platform composed of DSPs and FPGAs. The link from
Simulink to DSPs is obtained through RealTime Workshop
from MathWorks. Concerning the HW side, Xilinx is
providing a set of FPGA IPs which are guaranteed cycle
accurate equivalent to IPs provided in Simulink. It provides an
artificial translation made from Simulink to the FPGA. This
faces the following restrictions. It is firstly dependent from the
existence of the adequate APIs (Application Programming
Interface) for each platform. However if this solution is
generalized, all platform providers will make the effort.
Secondly and most important, HW domain is restricted to
Xilinx, and even a sub-set of Xilinx since only boxes having
the dual Simulink/VHDL IPs are available. Note that it does
not prevent any designer to make its own IP with this
technique. The guarantee of the equivalence is under the
responsibility of each company then.
Despite the above mentioned concern, MathWorks is
offering the most effective solution to SDR designer in the
short-term. Just a lack of a higher modeling introductory work
at specification level in the design flow may be regretted.
However, it really permits to implement a heterogeneous
design from functional simulation in Simulink environment to
the implementation. It is in this sense a signal processing based
approach.
D. High-level design
At the other extreme, and with longer term objectives, we
may refer to computer-science oriented approaches based on
high-level modeling, such as UML (Unified Modeling
Language) of the OMG (Object Management group). UML is
another way to interpret co-design. As UML indeed is
dedicated to help in the modeling of any kind of system, even
outside engineering domain, then why not for mixed SW and
HW? In addition, systems in general are becoming more and
more complex. Hardware and software designers (in its usual
sense here and not in the sense of differentiating processors
from FPGA designers) have to cooperate in larger and larger
projects. This is provoking a need for a global system design
methodology. The engineering answer has been MDE for
Model Driven Engineering, and the methodology, MDA for
Model Driven Architecture [29]. This approach is mainly
based on UML concepts. Another feature is the two steps PIM
and PSM. PIM stands for Platform Independent Model and
corresponds to a modeling of the application independently
from any implementation consideration. Then PSM for
Platform Specific Model adds new characteristics to the model
in order to take into account the HW platform of execution.
MDA, which was introduced in 2000 by the OMG, is now a
quite successful concept. An attempt to integrate SDR design
in a UML methodology has been done in A3S project [30][29]
for the modeling and prediction of non functional
characteristics of such systems. A metamodel for SDR has
been derived for the generation of a UML profile named “A3S
profile” [20]. A profile is a way to extend UML concepts for a
specific domain. Practically, it consists in customizing a UML
environment to a specific domain of application (and getting
rid off all the unnecessary concepts). The OMG has already
standardized several metamodels for embedded real-time
systems. The last is MARTE [16]. New researches on SDR are
inspired from MARTE, such as Mopcom [27] that aims at
defining in a 2 years-time a design flow to automatically
generate VHDL code from high-level UML-based modeling
[28].
E. Other approaches
Other design approaches for SDR have mainly oriented their
solution towards a specific hardware implementation target,
while offering the inherent flexibility of SDR design. One
solution is to propose a processor customized for SDR.
QuickSiver Inc. has designed a hardware chip and associated
software development tools. The Adapt2000 ACM System
Platform claims to integrate in a single IC, the Adapt2400
ACM (Adaptive Computing Machine), the capabilities of
ASICs, DSPs, FPGAs and micro-processors [31]. The
Adapt2400 is comprised of four distinct heterogeneous node
types and a node wrapper. Complementing the Adapt2400
Architecture are the InSpire Node Software Tool Set that
abstract away the complexity of the heterogeneous, multinodal, multi-tasking ACM architecture under a single unified
programming model.
Another choice may be to privilege brut performance
(computation speed, power consumption). A promising
approach in this field is the use of systems on chip (SoC) that
gathers several (more or less dedicated) processing units inside
a single chip. The generalization to a high number of units, as
required in SDR systems, leads to networks on chip (NoC). A
NoC can be differentiated from a SoC in the sense that the
number of units becomes so high that communications
between the units themselves become an issue and need their
own processing units. A very advanced work in the domain of
SDR is the FAUST (Flexible Architecture of Unified System
for Telecommunication) chip from CEA [32]. It has been
originally designed to support potential 4G candidates based
on MC-CDMA modulation schemes [33]. The flexibility was a
necessity for supporting several ranges of parameters in order
to evaluate several possibilities and to incorporate flexible
schemes for 4G. This included real-time reconfiguration
capabilities in a certain set of configurations limited by the
parameters possibilities of each unit. Then it turned in a
second step towards a real platform for the support of multistandards SDR systems. CEA designed a new version of
FAUST chip named MAGALI (Multi-Applications Globally
Asynchronous Low-power Integrated circuit), which
incorporates new and more diverse processing units. This
permits to MAGALI to support a large scale of current and
future standards, such as WiFi, WiMax, etc. with MIMO
support. Note that even if the set of possibilities is very high, it
is limited by construction. It is in particular specifically
dedicated for radio processing and could not address other
domains. It also includes advanced capabilities in terms of
power consumption savings, which is a definitely advanced
6
feature in the field of SDR.
Even if this seems contradictory with choosing a HW target,
Vanu Inc. made the choice to stay generic a hundred percent in
the sense that the hardware target they selected is “the” generic
processor. It is based on MIT researches in the middle of 90’s
[34] for GSM base stations [35]. The main idea is to take
benefit from the natural technological improvement of GPPs
with the Moore’s Law and keep the advantage of existing highlevel programming tools supported in a GPP environment. The
portability is then implicit by essence as it is the exact
application of the PC concept for the radio. Vanu Inc. has been
the first company to be certified for the manufacturing of GSM
SDR base stations in the US in 2004. But we may wonder why
this technique is not so popular and why Vanu Inc. does not
have a stronger market position. This reveals that this is
probably not yet mature for the infrastructure. It will
consequently be even worse for terminals as the main
drawback of GPPs is the power consumption.
Let us cite also the tools that are very accurate for the
optimized design of FPGA IPs and that may be
complementary to the previous, as for example: LISAtek [36],
and GAUT [37]. LISAtek aims at providing integrated tools
for the design of ASIPs (Application Specific Instruction Set
Processor) in order to obtain an optimized hardware processor
architecture for a dedicated set of processing operations and
the associated programming tools (debugger, compiler). This
could be used for the design of an SDR SoC for instance as
experimented in [38]. GAUT enables to reuse an IP
algorithmic specification written in C/C++ and to synthesize it
into an equivalent VHDL RTL specification. The potentially
pipelined architecture which is generated allows to explore the
design space for FPGAs or ASICs through a trade-off between
processing speed, power consumption and area. This can be a
complementary tool for the integration of processing units in
previous solutions.
At the system point of view, another tool worth to be
considered for the design of embedded equipments is CoFluent
Studio from CoFluent Design [39]. The tool permits to
manually explore a heterogeneous design space with an
extreme intimacy in order to anticipate and orient design
possibilities. This is a product-oriented approach that requires
quite much time and expertise. Moreover, CoFluent does not
provide automatic code generation facilities. The exploration
stays at the level of a virtual platform. We mostly privilege in
this paper rapid prototyping oriented solutions that help a
signal processing engineer to fast implement an SDR system
with automated steps.
Future will tell if one of those approaches more or less
dedicated to SDR are the good ones or not. Maybe the market
of SDR will be shared between several of them. The design
methodology proposed in the next section is less dedicated to
SDR than most of those described here and could also be
worth for other embedded real-time prototyping design
perspectives.
IV. A REALISTIC DESIGN FLOW FOR SDR OPEN
ARCHITECTURES
The term realistic is here to explain that with the current
state-of-the-art, a perfect solution for fully integrated co-design
is not available and not even foreseen. We mean here by open
architectures that we consider the solutions based on COTS
programmable and reconfigurable components. We exclude
from the study the approaches based on pre-oriented hardawre,
which reduces its range to only a subset of SDR capabilities in
terms of flexibility. What is proposed here can be used for any
heterogeneous real-time data-flow oriented embedded design.
However, the concern of this paper is SDR, so we must keep
in mind that SDR is not only data-flow oriented. The approach
has to be compatible with the addition of a reconfiguration
management architecture later, more control-oriented by
essence. Moreover, SDR design must not be restricted to radio
design. It should be seen at a larger scale, including other
layers of the OSI model. SDR design is also a question of
cross-layer design. This typically concerns in a terminal, a
joint approach for radio and image processing for instance
[40].
A. Ideal design flow for SDR
Fig. 3 schematically represents what could be the ideal SDR
design flow. It would first consist in a high level modelling
phase. This would permit to take advantage of mature
modelling techniques which permit to the different actors of
the design, respectively HW, SW and signal processing
designers, to commonly refine the very early design choices in
a common environment from the system specifications.
First step consists in making a simulation and functional
validation of each IP. The hardware target that is planned to
run the IPs would be here completely transparent. Ideally, the
same language would be used to program a SW (dedicated to
run in a processor) or a HW (dedicated to run on a FPGA) IP.
The system functional validation is directly obtained by
combining the validated IPs of previous step. In parallel to this
step, non functional requirements, as well as platform features
are derived. Platform has to be considered here as the
composition of devices and their associated low level software
such as board support package, RTOS, etc..
Then all the information is merged to permit an automatic
HW/SW partitioning for heterogeneous multi-processing.
Predictions figures are achievable, such as the application
execution time, so that other HW/SW matching could be tried
if the constraints are not respected. This permits to dimension
the HW platform at early stage of the design flow. Let us just
note that this is far from current reality. The partitioning
guarantees the functional accuracy of the multi-processing
version of the application compared to the previously
simulated mono-processing version. Of course, in that ideal
world, the automatic scheduling is also done. The
communication code generation is derived directly from the
scheduling. IPs automatic code generation is obtained,
whatever their nature, HW or SW. The transformation
guarantees the equivalence with the previous model so that it
7
permits to avoid repeating the simulation and verification
procedure already done earlier.
A co-simulation tool permits to definitively validate the
heterogeneous system. Finally, the implementation on the
platform is straightforward as the ideal design flow takes every
aspects of the implementation, whatever their level.
!
Fig. 3 – Ideal SDR design flow
B. A realistic design flow for SDR
A realistic design flow for SDR design is proposed in Fig. 4.
It pretends to use at its maximum capabilities a set of already
existing commercial and/or academic technologies.
Two main goals are achieved here. First goal is to avoid the
reprogramming of the same function several times. This is
what happens usually for heterogeneous design. No less than 6
re-coding steps may be done for certain sub-parts of a
heterogeneous system, as for a HW IP:
- Matlab functional validation,
- floating point C functional validation,
- fixed-point C validation,
- SystemC,
- cycle-accurate SystemC,
- VHDL.
In addition to the extra time necessary to code again the
same functionality, this approach is very error-prone and each
step requires repeating the debug and validation process.
Second goal consists in automating all the implementation
dependent actions, and keep the multi-processing functional
accuracy from separately developed and validated IPs. IPs
typical abstraction level is C language or VHDL. This could
even be an executable code or a bitstream coming from a third
party keeping the secrecy of its code. This means that the IPs
code for the selected HW target is available or can be obtained
with dedicated tools (compiler for DSP and synthesizer, place
and route for FPGA), so that non functional characteristics
may be extracted, such as for instance: the programming code
and data code size in a DSP, the number of gates for a FPGA,
the execution speed, etc..
As previously stated, the IPs design is voluntary set out of
this flow. Note that we also definitely consider here that there
is no efficient solution for automatic HW/SW partitioning.
This must be kept at the designer choice, based on his
experience.
Then HW (respectively FPGAs) and SW (respectively
processors) worlds have to be treated separately, each of them
having its own way of managing multi-processing for
automatic partitioning and scheduling. But strong equivalent
principles must be applied for both, which permits to keep the
global cohesion of the design for an SDR approach. Main of it
is the asynchronism between operations. It is natural on
processors since processing speed is correlated to device clock
and not data rhythm. It can be used in FPGAs using a GALS
(Globally Asynchronous, Locally Synchronous) design
methodology for HW IPs [41]. This gives many advantages
which are requirements of SDR for:
- re-usability: an IP may be used in different designs at
different clock rates,
- portability: from one HW device to another,
- managing reconfiguration in a future step,
- power consumption considerations which is the
origin of GALS.
Nevertheless automatic code generation for communications
between the two worlds has to be possible at least.
The methodology should also bring some automatic
partitioning and scheduling concerning the SW side only. It
should be able to provide a multi-processing version which is
functionally certified equivalent to the mono-processing
version of the application used for validation.
8
"
Fig. 4 – Realistic SDR design flow today
How can this be done? This is explained in section V.
C. How supporting reconfiguration in a future step?
Reconfigurability is intrinsically provided within this
methodology by two complementary means:
- the choice of generic hardware components for the
platform, such as GPPs, DSPs, FPGAs,
- by construction of the software application through a
component-based approach for both SW (processors)
and HW (FPGAs) processing elements.
This is the necessary base that will permit in a further step
to integrate a reconfiguration management architecture.
We have derived a reconfiguration management architecture
that is particularly suited to this design methodology [6][7]. It
is out of the scope of this paper to describe it, but we precise
that it has been successfully developed and experimented
through several prototyping showcases. One main feature of an
adequate reconfiguration management architecture is to
perform reconfiguration operations in a very short time in
order to limit as much as possible the overhead compared to
the signal processing duration.
It is important to point out that we deliberately disjoined the
reconfiguration management design on the one hand and the
SDR signal processing on the other hand, while keeping all the
necessary interconnections between them.
It does not seem mature enough to us to merge both
approaches at the same time currently. That is the reason why
the design methodology of this paper does not include but
supports the coherence with a reconfiguration management
architecture. However we are attempting to pursue this final
goal, such as for the FPGA sub-part in the Mopcom
collaborative project [27][28].
Adding to this, it has been highlighted that a particular effort
has been done to completely master each part of the design.
We insist that this is a key point of making efficient
reconfiguration is to integrate reconfiguration in the processing
elements design itself. From the formal point of view, we have
been investigating parameterization techniques for several
years. It consists in designing processing elements in such a
way that they may be reconfigurable fast enough through the
use of parameters [42]. An extension is to plan the multistandard equipments design with a major use of this approach
till the use of common operators, such as explained in
[43][44][45].
From the experimentation point of view, we have also
shown that the reconfiguration management may be merged, at
least partially, with the signal processing in order to offer more
flexibility. That is why we propose an hierarchical and
distributed management architecture in [46]. It is particularly
obvious in the particular case of partial reconfiguration of
FPGAs we have investigated in [7][11].
The transition from one configuration to another can be
extracted from the difference between two designs generated
rapidly by the methodology proposed in this paper. Some
overhead has to be planned for the transition from one design
to another in terms of processing time.
D. What already existing solutions do and not do
The combination of Simulink with platforms such as
LYRTech seems to us the closest solution. But there is an
eliminating limit: design his own IP is a necessity to control
the system in such a sensible implementation domain. With
Simulink approach, the designer depends on Xilinx IPs to fully
use the design flow pertinence. It is also possible to add you
own IP, but it necessitates coding it twice: for Simulink and in
VHDL. Then you break one of the goals which is to avoid recoding.
High level design approaches based on UML offer
promising perspectives to formalize the top of the design flow.
There is a need for a bridge between the specification and the
functional validation. A3S approach and A3S metamodel [17]
partly answered this need, but without generating the result of
the architectural study in code for the validation and the
implementation. Mopcom project is trying to fill this gap for
the HW side while using automatic transformations between
three layers of metamodels [28]. Each of these layers is taking
into account, with an increasing level of accuracy, the details
of the implementation. The goal is to model a system at several
levels of abstractions while keeping advantage at a lower level
of the validation and results already obtained at a higher level.
We do not pretend having explored all the existing
solutions, but we mentioned the most interesting ones. In a
nutshell we can draw the following analysis. There are two
ways of considering the extension of the design flow towards a
fully integrated and automatic design flow for SDR. Either
9
starting from the electronics point of view and follow a
bottom-up way. This is what is chosen by designers that want
to keep real-time as a priority for instance. Or it is also
possible to look at it in a top-down perspective and privilege a
software engineering approach to take full benefit from last
computer science advances and tools.
V. A DESIGN METHODOLOGY FOR ULTRA-FAST SDR
PROTOTYPING BASED ON SYNDEX
We propose here in detail a design methodology for SDR
prototyping respecting Fig. 4 and fulfilling the requirements of
part II. The set of tools we are using for that purpose is
probably not the only possibility. That is why we firstly
presented this methodology in general in section IV.B and a
possible instantiation in the present section. It lets free any
other implementation of this methodology to the designers,
depending on their particular application domain or habits. We
remind also that the granularity level of the constitutive
elements considered in the methodology is at the size of a
signal processing IP, such as a filter, a Viterbi decoder or a
FFT for instance.
A. SynDEx approach
The tool selected for the design flow has been SynDEx [47]
from the INRIA till now. SynDEx stands for Synchronized
Distributed Executives. The role of this tool in the proposed
methodology is to proceed the coarse grain design steps till the
code generation, mainly the glue communication code between
IPs.
Then specialized tools dedicated to each processing domain
(respectively HW/FPGAs and SW/processors) take over till
the implementation. Major requirement of the design flow is
that no re-writing of the code is necessary between these two
steps in order to keep former step verifications valid.
a)
Major features
Entries for the design flow are two graphs:
- a hardware platform diagram,
- a software application diagram,
In order to speed-up design phase, it is deliberately chosen
to have only a restricted set of parameters taken into account in
the architectural exploration:
- execution time of each IP,
- communication atomic time for each data type,
- communication means features
The goal is to obtain an implementation on the HW platform
as fast as possible in order to deal with realistic constraints
instead of having approximate simulations of these constraints
in the design flow itself.
Partitioning optimization of SynDEx is performed using
graph theory. The SW application graph is modeled by an
extended Data Flow Graph (DFG) which is an oriented hypergraph. Each vertex corresponds to an algorithm operation or a
processing element that is activated by the fullness of its input
buffer and each edge represents a data transfer between
operations. An example of SynDEx software application graph
IP_1
IP_2
o
i
IP_3
o
i
IP_6
o
i_1
i_2
Operation
i_3
IP_4
i
Data dependency
between 2
operations
o
IP_5
i
o
Fig. 5 – Example of SynDEx software application graph
Moreover, the model includes hierarchy, delay, conditioning
(if then else) and factorization (for loop) to express the
potential parallelism. Factorization, which is associated with a
repetition factor, is used to repeat operations and requires
additional specific vertices in the DFG:
1) Fork/Join vertices: the “Fork” vertex explodes each
element of the data it receives for each parallel repetition of
the consumer operation whereas the “Joint” vertex builds the
data it sends by concatenating each separated element
produced by each repetition of the producer operation.
2) Iterate vertex: this vertex aims to sequentially duplicate a
producer/consumer operation where outputs of the current
operation can be the input of the next one if data names are
similar. More details can be found in [49].
The hardware architecture is modeled by a non-oriented
hyper-graph where each vertex is a processor (hardware
component) and each hyper-edge represents a communication
medium, as shown in Fig. 6. In this model, a processor consists
of one operator and as many communicators as there are
connected media. An operator executes operations which are a
part of the algorithm and a communicator executes a
communication operation when a data transfer is required. In
this way, multi-component architecture can be represented by a
network of Finite State Machines (FSM) interconnected with
communication media (FIFO, shared memories etc.).
PC1 (lib_archi/pentiumOS) (main)
TCP1
PCIsam_0
PCIram_0
TCP1 (lib_archi/TCP)
PC2 (lib_archi/pentiumOS)
TCP1
PCIsam_0
PCIram_0
Fig. 6 – Example of SynDEx hardware platform graph
The aim of this tool is to find the best matching between an
algorithm specifying the application to be run and a multicomponent architecture. In addition to this, real-time and
embedding constraints must be satisfied. This methodology is
based on a graph theory to model the software application and
the hardware architecture. Both the software and the hardware
are described by distinct graphs. SynDEx transforms those two
graphs with graph transformations in order to generate an
optimized code implementation.
An efficient implementation graph is obtained by
optimizations by simultaneously realizing distribution and
scheduling, while trying to minimize the execution time.
There are a large number of possible implementations. The
optimization problem aims to select the most efficient one
between them (best latency). The latency is the total execution
time of the DFG on a given HW architecture between the first
scheduled operation and the last one. Moreover, the problem
of distribution and scheduling in case of HW multi-component
is known to be NP-hard (an exhaustive research on all the
possible fulfillments is inconceivable). This is why heuristics
are used to match the best approximation of the optimal
solution (greedy and genetic algorithm). The current heuristic
attempts to minimize the total execution time of the algorithm
running on the multi-component architecture. Moreover, since
the Synchronized Distributed Executives (SynDEx) are
automatically generated and safe. It consequently eliminates
part of the tests and low-level hand-coding, and decrease the
development lifecycle as well.
SynDEx provides a timing graph, as in Fig. 7, which
includes simulation results of the distributed application and
thus enables SynDEx to be used as a virtual prototyping tool.
Time
is given in Fig. 5.
10
Fig. 7 – Example of SynDEx partitioning and scheduling
SynDEx is also providing a static scheduling, which is of
major importance for hard real-time requirements. It gives the
guarantee of an execution within a restricted given latency
period. SDR equipment will have very strong real-time
constraints to respect. Bringing SW in the radio design must
not be followed by customer’s suspicion or even worse
rejection. It has already been seen in the mobile phone area
with collapse of WAP (Wireless Application Protocol) that a
technological advance badly introduced on the market may be
very harmful.
b)
SynDEx tool
SynDEx is a cad tool aiming originally at parallelizing the
processing of an application on a multi-processor network[51].
It provides a multi-processing version of an application that is
functionally accurate as compared to a mono-processor initial
version. SynDEx performs an optimized partitioning and
scheduling of an application on a multi-processors architecture
made of GPPs. Typically communication media is TCP/IP in
this context. This can be easily implemented in a platform,
instead of a network perspective, by varying communication
means between the processors, such as FIFO, dual-port
memory, PCI bus, etc.. Moreover, SynDEx has been extended
towards the embedded reality and in particular in terms of
memory optimization [49] [50]. In this sense, SynDEx
particularly answers the SDR issues in the restricted SW
domain, in other words if the SDR equipment is only made of
processors. SynDEx indeed matches to ideal software radio
design. Nevertheless other SynDEx intrinsic features are
worth, beyond the purely SW application domain. After the
multi-processor matching, SynDEx generates a scheduled
version of the application for each of the processors involved
in the platform. This code is generic and not dedicated to any
implementation. It is written M4 macro-code. Each processing
node of the graph consequently has at its disposal a generic
expression of the scheduling of operations and communication
synchronisms that can be translated in any programming
language (assembly, C, VHDL, etc.). The translation is
operated by GNU M4 macro-translator, thanks to the use of
translation libraries, considered as dictionaries. We have been
developing translation libraries for many different embedded
targets, as shown in TABLE I, added to the already existing
ones of TABLE II.
TABLE I
LIBRARIES DEVELOPPED FOR THE SUPPORT OF MULTI-TARGETS, MULTIPLATFORMS DEVELOPPED BY IETR/INSA
Media Type
Communication
Platforms
Processors
media
Sundance
TMS320C64x
SDB
FIFO/SAM
SMT310q
(Sundance)
Sundance
SHB
TMS320C62x
FIFO/SAM
SMT320
(Sundance)
Sundance
DM642
Comport
FIFO/SAM
SMT361
(Sundance)
x86 (windows)
FIFO/RAM
Sundance
PCI_RAM
SMT395
(Sundance)
Sundance
PCI_SAM
FPGA
FIFO/SAM
SMT365
(Sundance)
Sundance SMT
TCP (windows,
FIFO/SAM
319
C62x, C64x)
(Framegrabber)
Pentek p4292
Converter ADC
FIFO/SAM
(Pentek)
Vitec
Converter DAC
FIFO/SAM
VP3-PMC
(Pentek)
Sundance
VP3_RAM
FIFO/RAM
SMT348
(Vitec)
FIFO/SAM
Bi-FIFO
(Pentek)
TABLE II
ALREADY EXISTING LIBRARIES
Processors
Intel x86 (linux)
ADSP21060
TMS320C40
MC68332
MPC555
Communication
media
TCP (linux)
RS232
CAN
Media type
FIFO/SAM
FIFO/SAM
FIFO/SAM
Libraries for platforms, processors and communication
media are listed in the two tables. Certain libraries depend
only upon the language (C, VHDL), others depend on the
11
processor nature, and last on the other devices, typically for
communications (FIFO, memory, bus, etc.).
Since SynDEx is dedicated to the processor world (called
SW in this paper), it extracts parallel operations from the
application graph to map them on different processors
(hardware components) but makes sequential operations of IPs
on each of the processors of the HW graph. This consequently
does not match with the HW (FGPA) reality, which benefits
from parallel execution. However all the code generation and
scheduling of the SW world on the one hand, and between the
HW and SW world on the other hand can be kept a hundred
percent. The content of the HW domain has only to be
considered separately, with common principles shared with the
SW side.
B. Heterogeneous processing support
This methodology is originally dedicated to multi-processor
(of any kind: DSP, GPP, µC, etc.) architectures programming.
It also efficiently supports devices which are controlled by
processors, such as analog or digital ASICs. And in order to
adapt it to SDR design, it is mandatory to extend it to
reconfigurable hardware computing, namely FPGA.
HW implication has indeed to be considered outside the
partitioning optimization of SynDEx if we want to keep the
HW efficiency (of parallelism) since processor world is
sequential. That is why we mentioned earlier that HW/SW
partitioning was not supported by this methodology. The risk
to deal with HW and SW approaches in a single automatic
methodology is to decrease one side performance at the benefit
of the other side. Optimization purposes may be indeed the
opposite between HW and SW. It can be considered as a
granularity issue between the description of an efficient
algorithm written in SW (coarse grain), respectively C
language, or HW (fine grain) respectively VHDL.
Then we propose a methodology that integrates the HW
design to the SW and system design methodology presented
till now, while the HW/SW partitioning has been manually
done. The characteristics that should be kept in the HW side to
be fully compatible with the SW side of the design are the
following, mainly based on a component-based approach:
- HW portability from one design to another, whatever
the device, the clock, etc.
- HW to SW IPs migration,
- the support of IPs made of gates as well as IPs
running on embedded processor cores inside FPGAs,
- the support of ASIC.
a)
FPGA support
A reformulation of the design approach has to be made here.
The way to solve the issue of interacting between an
intrinsically sequential with a highly parallel approach is in the
answer of the following question: How to reformulate the dataflow approach of the sequential world of processors to the
parallel world of reconfigurable hardware, without loosing
parallel HW highly efficient processing speed? Let us just
recall that one major goal in SDR is to add, delete, replace a
processing element with no disturbance for the other
processing elements of the radio design which are inside or
outside the FPGA.
The first part of the solution is solved by the IP approach,
which guarantees the parallelism inside the IPs since it has
been designed with usual HW tools. In other words, SynDEx
permits to keep parallelism of execution inside an HW IP with
this approach, whereas SynDEx is made to execute sequential
operations inside a single processor, and only extract
parallelism between several IPs on a multi-processing
architecture.
The second part of the solution is using GALS, as already
addressed in section IV.B. That permits to provide also
parallelism through pipeline. GALS permits to generalize to
the HW world the asynchronism of IPs processing compared
to the data flow rhythm. Input buffers, associated with
adequate hand shaking signals of Fig. 7 permit to launch and
stop the IPs operation depending on the presence (or not) of
data at the input of the IP, or at the place available in the
buffers at its output.
'*#, '*+
'*+
'*+
'()
'()
'()
&
#$%
0
!
/
#
/
(
&!
/
./
/
*
2
.1
1
-
!
2
.2
2
2
4
!
3
12
1
.3
/
Fig. 8 – Data flow speed adaptation in a GALS if f1> f2> f3
b)
Specific design use cases with FPGA
In the specific case of a unique IP running on a FPGA, the
methodology is one hundred percent useful and automatic, as
there is no sequential issue for one IP. The FPGA is then a
HW accelerator. In this particular context, an automatic
HW/SW partitioning is even possible as illustrated in Fig. 10.
Let us consider the example of the software application in Fig.
9. If it has to be decided whether IP4 should be inside a FPGA
or a DSP, the condition is to have both versions of code with
their corresponding execution time for both targets. Then the
methodology is able to deal with such a context and may
optimize and generate the full code for both DSP and FPGA
with the scheduling obtained on Fig. 10.
IP_1
IP_2
IP_3
IP_6
Fig. 7 –GALS schematic encapsulation
GALS has three main interesting features in our
methodology, as illustrated in Fig. 8:
- adapt the average frequency of operation of an IP to
the other IPs frequency of the chain (Fig. 8b for IP
A),
- this permits to change an IP of clock domain (Fig.
8c),
- this permits to block an IP from time to time, as for a
reconfiguration operation for instance(Fig. 8d).
o
i
o
i
o
i_1
i_2
i_3
IP_4
i
o
IP_5
i
o
Fig. 9 – Software application graph example
The methodology takes the decision to instantiate IP_4 on
the FPGA instead of the DSP since executing IP_4 in parallel
of IP_3 an IP_5 permits to save time: IP_4 execution time and
communication overheads are completely masked during IP_5
and IP_3 execution. The FPGA plays here the role of a coprocessor.
13
adjustment of these parameters. Note that in certain context, a
DSP may change registers inside a FPGA, from which they
configure the ASIC.
#
"!
&
#!
Fig. 10 – Fully automatic scheduling obtained with one IP on the FPGA
Time
A sub-optimal but entirely automatic way of using the
proposed methodology is also possible with several IPs on a
FPGA. As shown in Fig. 11, it consists in separating a FPGA
in as many HW components as there are IPs. If IP3 and IP4 of
Fig. 9 are planned to be implemented on a single FPGA, we
may define two Virtual FPGAs, assigning IP3 on virtual
FPGA1 and IP4 on virtual FPGA2. Fig. 11 shows how the
methodology is able to parallelize IP3 and IP4 and take it into
account in the global scheduling of the application. Merging
all the generated VHDL code for each HW components has
just to be done hereafter.
Fig. 11 – Scheduling graph with a DSP and one FPGA for each IP
c)
Digital and analog ASIC support
SDR design may not only involve fully programmable or
reconfigurable devices, it is necessary to support also specific
devices such as ASIC for instance. Let us cite in an SDR
design typical ASIC that may be met: digital down and up
converters (DDC and DUC), analog to digital and digital to
analog converters (ADC and DAC), amplifiers, analog filters,
antenna, etc. such as illustrated in Fig. 12. These circuits have
to be inserted in the design flow for two reasons. They may be
first the destination or the source of data to be processed.
Secondly, they may be parameterizable and need to be
configured through primitive functions activated by a
processor for instance. Consequently the processor has to
contain the code necessary to perform the initialization and
#!
!
!
#
#$
%
!
!
#
" !
#
#$
#'
#
#$
Time
#
#!
#
% !
!
#
#$
#
#
! ##!
#
#
#!
Fig. 12 – SynDEx hardware graph with ASIC (DDC, DUC, ADC, DAC)
C. A non functional time-based approach
The set of non functional characteristics to be taken into
account would ideally be as many as possible, but to keep it
feasible, it is restricted here to time. This means that each IP
has only to be characterized in time in order to be integrated in
the design flow. Time includes both processing and
communication time. Either it is extracted from measurement
or it can be estimated. Then SynDEx proceeds the global
application timing optimization by mapping the IPs on the
hardware architecture. The precision of the global design
prediction depends on the accuracy on each IP. The
partitioning algorithm used in SynDEx is a greedy algorithm,
which provides a good compromise between partitioning
execution time and quality result. Another approach has also
been done based on genetic algorithm to refine the SynDEx
greedy algorithm solution [49], which provides the same
functional results. In case of the use of genetic algorithm, cost
function and decreases the latency in some cases from 50%.
The price is an extended computation time of many thousands
compared to a solution obtained by SynDEx greedy solution.
Moreover, added to time some memory consideration may
be taken into account and considerably diminish default
memory allocation done by SynDEx [49].
D. Platform abstraction
This methodology implicitly provides on the one hand an
abstraction of the hardware platform to the software
processing elements by using already existing IPs. For the
communications on the other hand, SynDEx generates a
generic code which lets the entire possibility to the designer to
translate it in whatever language it wants, as explained in V.A.
The automatic partitioning is maximum in case of multiprocessing on the SW side (processors) using SynDEx, and on
the HW side (FPGAs) using dedicated hardware tools such as
GAUT for instance. Only the border between HW and SW has
to be manually decided, which is the current limit of the
HS/SW co-design issue.
This abstraction permits ease in terms of portability on new
processing units or new platforms. It relies on the use of
libraries using the appropriate drivers for each platform
An IP can be moved from SW to HW if its code is available
for both targets, in C and VHDL for example. It permits design
adaptation by a simple click:
- migrate a processing element from a processor to a
FPGA and vice-versa,
- add a processing element in a data-flow graph
whatever it is on a processor, a FPGA or an ASIC,
- change of platform,
- migrate a processing element from a gate-oriented
implementation
to
a
processor-oriented
implementation on a processor core inside a FPGA,
- etc.
E. Sum-up of methodology principles
The proposed methodology is based on SynDEx tool in
association with several concepts, such as the componentbased approach. This paragraph aims at recalling its
advantages and show how it follows requirements of part II for
SDR design as well as the realistic design flow exposed in part
IV.B.
SynDEx provides an overall execution time prediction of
the application in a multi-processing platform with
possibilities towards heterogeneous design. At least some
contexts of FPGA design are fully integrated in the
methodology, and ASICs are also included in the design.
SynDEx has the advantage to propose an optimized and static
scheduling, which is a guarantee for hard real-time constraints
of SDR. Moreover, the methodology permits the automatic
generation of the code of each processing unit of the platform
including IPs encapsulation and communication glue. As this
code is generic (M4 macro-code), it may be translated in
whatever language, thus enabling the support of a
heterogeneity of processing devices. The re-usability of IPs is
intrinsic to the methodology. All this permits to speed up
designs at an amazing rate.
The reduced set of non-functional parameters is considered
as a positive feature in the sense of ultra-fast prototyping.
SynDEx is a tool that can be used in a couple of days to
generate prototypes based on existing previous work. Students
can use it for small projects. Only a few weeks are needed to
become an expert. It is affordable to student trainees for a
period of several months as soon as a supporting designer is in
the lab. Section VI proves the approach efficiency for many
examples of design scenarios. If he has a hardware platform at
his disposal, a frequently user of this methodology takes the
habit to make the simulation, test and validation of the
application on the platform itself.
Note that in the future any other tool providing extended
features concerning the presented goals compared to SynDEx
would be worth to be considered. Moreover, this methodology
could be also expanded at the top of the design flow, towards
higher level design tools based on UML. The input graphs
from SynDEx could easily be generated from previous graphs
defined in UML environment. A bridge between A3S graphs
and SynDEx graphs for instance would be straightforward.
SynDEx would then only take the timing non-functional
parameter from A3S. The other non functional parameters
14
(power consumption, memory and surface considerations)
taken into account in A3S would not be used in the mapping
process itself but would at least have been considered at the
very beginning of the design, which is better than nothing. Of
course, long term goal is to really integrate all non-functional
parameters in the complete design flow.
VI. DESIGN EXAMPLES (IMPLEMENTATION)
Whatever the methodology, the necessary effort to develop
an SDR, is on the one hand to code an application to simulate
and validate the correct functionality of the radio, and on the
other hand to code low level software associated to the
hardware components in order to prototype the radio on the
platform. The lack of methodology makes all this repeated
from scratch for each new design. This is what exists currently
in SDR domain.
The methodology proposed in this paper permits to
mutualise the development effort of both:
- signal processing IPs for the radio,
- low level software of hardware architecture.
It is the acceleration reason of the methodology in
combination with a set of concepts particularly adequate for
SDR design and explained earlier. We propose in the
following paragraphs to illustrate the methodology efficiency
in various design scenarii. Note that it will be illustrated that
SDR design is tackled in the large here. This involves also
cross-layer design, in the sense that PHY (radio) layer, as well
as application (video) layer may be merged with the proposed
design approach. This proves also the relevance of the
approach for general heterogeneous embedded design outside
SDR.
A. Starting from scratch
Our first attempt for developing an SDR is as simple as a
broadcast FM receiver. This is a play activity for a hundred
percent software radio implementation since the antenna is
directly connected to the analog to digital converter. The
selected platform is a Pentek P4292 made of four
TMS320C6203 DSPs. All this processing power is much
greater than necessary and is only used to evaluate the
methodology in a multi-processing context. The work firstly
consists in developing the FM receiver data-flow graph with a
component based approach, and the fixed-point C language
code for the content of each IP. This represents from a few
days to a couple of weeks of work maximum, depending on the
previous knowledge of the modulation, the quality of the
receiver (stereo or mono, choice of demodulation algorithm,
etc.). This can be done on a first hardware platform
architecture made of one PC, which is available by default in
SynDEx.
Secondly, low level software for the HW component
support are generated for the Pentek platform and gathered in
libraries. This concern all the specific primitives that address
synchronisations and threads for the C6203, as well as the
communications dedicated to the FIFO of the platform. This
may take several weeks, but less than 2 months, including M4
learning.
It is not at this stage that the methodology is permitting to
gain since this preliminary work is always necessary.
However, as the design steps are very well identified and
separated in the methodology, this is already helpful and
permits to save time. The work can also be easily separated
between two designers, one signal processing designer for the
software application (FM) and one for the platform software
support (libraries). Fig. 13 gives a FM receiver application
graph which permits to generate with the methodology the full
software radio FM receiver. Bottom Fig. 13 represents the
hierarchical view of the top FM Demodulation box.
. *2 ( ) (
'
0
3
DSPs with no additional effort.
B. Digital ASIC implication
Under-sampling techniques are used with a 65 MHz
sampling frequency at the ADC. The tuning is managed by
software that changes the DDC (Digital Down Converter).
These consideration may appear or not in the HW platform
graph. The digital ASICs in such architectures, and in the
Pentek platform for example, are usually managed by the
DSPs. The DSPs contain initialization code to configure the
parameters of the ASICs, such as ADC and DDC here, but also
DAC (Digital to Analog Converter) and DUC (Digital UpConverter) in the context of a transmitter. The DSP may also
change these parameters at run-time. ADC and DDC are
represented in a unique bloc in Fig. 14 since the only
information the DSP has to know is the media (BIFO_VIM)
where it has to send data and control towards the 2 ASICs. The
connection between the 2 ASICs is wired after.
(
'5 '3
4
" '
3
'5
3
'
'3
$
0
$
(
(
'2
0
'3
'
'2
/
/
( ) *
*2
(
'
( ) *
'
!
"!
" '
;;
<<
'2
- ( /
/
'2,
'2,
&
2
'2
- ( /
(
-
(
&
2
/
+
(
/
'2,
$(
# & 0 ,
#
0 !
!
Fig. 14 – SynDEx Pentek hardware platform graph graph
'
-
- ( /
/
/
'
0 10 '2
'
&
( 5$
'2 ' (*
0
0 10 '2,
.
6
6
(
2
(
- ( /
( *
*
8 7$
-9/
- ( /
2
'2
,
'2
(
(
(
2
(
2
- ( /
'2
'2
6
(
( *
*
7$
/
'
"
- '2
/ *2 '2
'
'2
(
(
"
(
(
,
,$(
.
!
!
(
/
15
/
/
,
/
6
(
-
/
( *
*
8 7$
-
-:/
/
-
/
2
&
$(
&
2
2
(
'2,
(
2
&
2
,
*2 '2
2
'2,
&
2
'2,
'2,
$(
Fig. 13 – SynDEx FM receiver application graph
The FM receiver shown here is a complex one which
operates stereo demodulation. It is the result of the
improvement of much simpler previous designs. Only one DSP
is largely enough to run the FM receiver in real-time,
nevertheless the methodology permits to run it on several
C. A new SW application on the previous platform
The Pentek platform, with its 4 C6203, is of course
supposed to support much more demanding radio applications,
such as 3G radio waveforms. We then develop a UMTS FDD
baseband chain implementing DPDCH (data) and DPCCH
(control) channels till the intermediate frequency (IF).
After functional validation in fixed point on a PC
environment (Visual C++ or Borland), the application is then
ported in the embedded multi-DSP environment of the
platform at no cost since libraries were already existing from
FM first experiments. This concretely means that once the SW
application graph is done in SynDEx and validated on a monoprocessor PC architecture graph, the designer just need to take
the hardware platform graph of the Pentek platform previously
developed for the FM application, and map in one click the
UMTS application on the Pentek multi-DSP platform. The
methodology automatically generates from SynDEx and M4
macro-translator the main.c executive file of each DSP,
including IPs launching and synchronizations means. The
designer may even choose how many DSPs are necessary to
run the application in real-time thanks to the overall execution
time prediction provided by SynDEx. Anyway, it can directly
check it on the platform itself in real conditions. SynDEx helps
indeed at making this prediction while offering the possibility
to automatically insert timing measurement between all IPs.
Then from a mono-processing run on each potential device
target, measurements give the figures of the execution time of
each IP. Once these figures have been entered in SynDEx, as
well as communication timings, the tool is able to optimize the
mapping for a multi-processor implementation. It provides a
timing prediction and the associated code for each processing
unit. The designer may explore different architectural
possibilities till he is satisfied.
Fig. 15 shows the UMTS transmitter application graph
within SynDEx environment. Several hierarchical blocks are
present in this graph, as well as repetitions. This makes a total
of 130 instances deployed on the hardware architecture for the
execution of one frame. One UMTS frame is composed of 15
slots, the content of which has been detailed in the bottom half
part of Fig. 15.
"
(
'
.
$5 (
(
-#
>
!
.
" '
.
.
"
/ '
/ '
*
;
!
7
.
/ '
.
.
/#
/
/ ' (
. (
little changes could provoke. Let us consider the context of the
repartition of the UMTS processing between a GPP and
several DSPs. In the Pentek platform environment, this means
adding a TCP link between one of the platform DSP and the
host PC processor. SynDEx already has the TCP necessary
library on the PC side. So just developing the equivalent
library on the DSP side is necessary, which only consists in
encapsulating the code provided by Pentek in a SynDEx
library. This takes one day.
Fig. 17 shows the SynDEx graph of the new platform
involving the PC and the DSP which permits now to
automatically provide the code and run the UMTS application
in a new environment made of one Pentium and four TI C6203
processors.
PC (PENTIUM)
TCP
/#
.
Personal Computer
ADC_1 (DAC)
.
VIM
.
.
/
VIM_1 (BIFO)
/
TCP (TCP)
/ ' (
/
.
XX
YY
IO
TCP
(
(=
/
'2
.
.
/
/
/#
$
/#
'2
%0
/
''2
/
/#
$
'2
' ( '2
'2
'2
,
11?
;
DSP_B (TMS320C6203)
XX
YY
IO
TCP
Bus_4 (BIFO) Bus_2 (BIFO)
.
/
/ '
*
/ ' (
Bus_1 (BIFO)
DSP_A (TMS320C6203)
(
.
16
'
'
/ '
'
$ '
'
'
'
.
'
(
('
.
.
.
(
11?
*
.
'
(
;
7
('
.
DSP_C (TMS320C6203)
XX
YY
IO
TCP
Bus_3 (BIFO)
DSP_D (TMS320C6203)
XX
YY
IO
TCP
/.
(
' ($
7
$5 (
(
Fig. 15 – SynDEx UMTS transmitter application graph
Fig. 17 – SynDEx Pentek+PC hardware platform graph
Fig. 16 shows the Pentek platform HW architecture with
four TI C6203 DSPs and an ADC.
This gives the opportunity to rapidly evaluate the pertinence
(or not) of new design choices at a very low cost, in real
conditions. This is exactly the opposite of a manual design
methodology where any architecture exploration is very costly.
ADC_1 (DAC)
VIM
VIM_1 (BIFO)
Bus_1 (BIFO)
DSP_A (TMS320C6203)
XX
YY
IO
TCP
XX
YY
IO
TCP
DSP_C (TMS320C6203)
XX
YY
IO
TCP
DSP_B (TMS320C6203)
Bus_3 (BIFO)
DSP_D (TMS320C6203)
XX
YY
IO
TCP
Fig. 16 – SynDEx Pentek hardware platform graph
D. Very fast platform extension
We saw in the previous paragraphs that the porting of the
UMTS application on the Pentek platform is done at no cost as
the SynDEx libraries already existing from the FM case study.
It may also be necessary sometimes to rapidly investigate what
E. Very fast application extension
The extension of the platform towards the general purpose
processing side is experimented in a cross-layer design mixing
video and radio processing. We consider the following
application already discussed in [40]: a combined UMTS FDD
/ MPEG-4 implementation.
This cross-layer implementation benefits from two designs
made in two different research contexts but using the same
methodology proposed here. On the one hand, the SDR design
approach we are illustrating in this paper, and on the other
hand an image processing perspective to investigate MPEG-4
coding features. These two designs are being done in a
completely separate manner in 2 distinct SynDEx projects.
Each of the projects just has to connect to the same TCP
socket, on the PC side for the MPEG-4 application, and on the
DSP board side for the UMTS radio. Connection is obtained
automatically between the two applications and MPEG-4 data
are transmitted through a UMTS link.
F. Rapid prototyping on a new platform
Now the context of the porting of this complex application
gathering UMTS and MPEG-4 applications is considered,
which constitutes another of the issues to be faced in SDR
design. It has been illustrated in [40] in the case of a Pentek
platform composed of two C6203 DSPs, and a Sundance
platform having two C6416 DSPs. We suggest referring to
[40] for more information about hardware platform description
model and corresponding developed libraries. The schematic
of Fig. 18 shows a sum-up the porting scenario.
UMTS
Modulation
MPEG-4
Coder
FIFO
PC
TCP
PCI BUS
FPGAs
+
DSPs
Sundance
Pentek
Vitec
MPEG-4
Decoder
UMTS
Demodulation
FIFO
SDB
BIFO
FPGAs
+
DSPs
Sundance
Pentek
FIFO
TCP
PC
+
DSPs
PCI BUS
Vitec
Fig. 18 – MPEG-4 over UMTS FDD
Another platform used in this work is a desktop computer
associated with the VP3-pmC multi-DSP board. The PC
consists of an Intel Core 2 Duo CPU at 2.2 GHz. The VP3pmC is a parallel programmable processing platform dedicated
to professional video applications like MPEG4-AVC/H.264
real-time encoding and MPEG2 to H.264 trans-coding. This
platform contains 5 DSPs TMS320DM642 running at 720
MHz clock rate, each of them containing a SDRAM of 32
MBytes and a FPGA hardware co-processor to manage the
communications between all DSPs on the platform and the
communications with the PCI-Host. All transfers between
DSPs are managed by 5 DMA controllers inside the FPGA.
This platform has been previously described under SynDEx
tool in [53]. The UMTS application is automatically
prototyped on this platform and takes advantages of the
methodology: no deadlocks, functional checking of the
application portability and automatic multi-processors
implementation. Due to the small internal memory, TI cache
memory is automatically used as presented in [52]. Timings,
given in TABLE III, have proven that increasing the number of
DSPs for this application was not accurate since we obtain
only a 1.35 acceleration factor from one to two DSPs and then
no more acceleration whatever the number of DSPs until five.
TABLE III
TIMINGS OF THE UMTS TRANSMITTER OVER A NEW PLATFORM
5 DSPs
1 DSP
2 DSPs
3 DSPs
4 DSPs
14,3 ms
11,3 ms
11,3ms
11,3ms
11 ms
Results on this platform are slower than real-time imposed
by the standard that is 10ms. This shows that the application
graph has to be redefined to extract more potential parallelism
so that the hardware platform can perform more parallel
execution.
G. FPGA implication
Another extension of the design space exploration is to
17
combine SW and HW processing, in the sense of processors
and FPGAs. This could be the way to solve the real-time issue
met with previous paragraph scenario.
Thanks to the development of VHDL M4 libraries, the
proposed methodology makes it possible to generate VHDL as
easily as C language code from SynDEx. The only limitation
have been exposed earlier: SynDEx is not appropriate to
automatically optimize the scheduling and partitioning of
multiple HW IPs since it based on a sequential processing
model between IPs. But all the other features of the
methodology are valid, such as automatic code generation of
communications and synchronization means. And in the
particular case of only one IP inside a FPGA, it means that the
restriction is null and the FPGA is used as a hardware
accelerator.
The UMTS FDD baseband transmitter most demanding IP
in terms of processing power is the pulse shaping filter. We
propose to map the UMTS receiver in the hardware platform
of Fig. 19 to accelerate the receiver execution.
PC (PENTIUM)
TCP
Personal Computer
ADC_1 (DAC)
VIM
VIM_1 (BIFO)
XX
YY
IO
TCP
VIM
VIM_1 (BIFO)
TCP (TCP)
Bus_1 (BIFO)
DSP_A (TMS320C6203)
FPGA_1 (fpga)
DSP_B (TMS320C6203)
XX
YY
IO
TCP
DSP_C (TMS320C6203)
Bus_3 (BIFO)
DSP_D (TMS320C6203)
XX
YY
IO
TCP
XX
YY
IO
TCP
Fig. 19 – SynDEx Sundance+FPGA hardware platform graph
Results of TABLE IV show the timings obtained after the
implementation without or with FPGA. FPGA is used as an
accelerator and permits to respect real-time constraints since a
UMTS FDD frame has to be generated every 10 ms and the
combination of a Xilinx Virtex2 FPGA and a TI C62x DSP
permits to decrease the frame execution to 9.9 ms.
TABLE IV
TIMINGS OF THE UMTS TRANSMITTER WITHOUT AND WITH FPGA ON THE
SUNDANCE PLATFORM
Target
Sundance
Pentek
Configuration
1*C64x
1*C62x
Time/frame
MFL ratio
15.9ms
60%
20.2ms
84%
1*XC2V
+
1*C62x
9.9ms
32%
H. MPEG-4 Decoder rapid prototyping
An MPEG-4 decoder description has been built for intra
pictures in [54] according to video MPEG-4 texture decoding
libraries, whose operation granularity fits to level of IDCT,
VLC and dequantization within a block. Description
granularity has a significant impact on the final
implementation. The MPEG-4 decoder in [54] is extended in
[50] to Simple Profile. MPEG-4 natural texture coding tools
divide intra or predictive pictures into macroblocks, composed
of four 8x8 blocks of luminance channel, and the associated
8x8 blocks of chromatic component. Each MB operation has
been optimized for a DSP implementation: interleaving loops,
conditional tests leading to pipeline rupture, no dynamic
allocations. Our implementation is open source started from
xvid decoder (http://www.xvid.org) and received the
agreement of xvid team to be the first porting over a DSP.
Thanks to the methodology developed here, we would like
to emphasize how fast can be ported an application from one
platform to anther. In this case, this MPEG-4 decoder has been
quickly ported over several platforms presented earlier in this
paper: VITEC, Pentek and Sundance. It can decode in realtime sequences up to 2048 Mbps with a VGA resolution at 60
frames per second on the TI C6416 DSP at 1 Ghz. The dataflow application development phase, in atomic operations
enabling parallelism expression took around one person-year
of work on Sundance platform. Then the porting over the 2
other platforms is instantaneous (less than one hour time) as
soon as libraries of TABLE I in V.A.b) are available. This
proves the efficiency of the design methodology proposed in
this paper.
VII. CONCLUSION
We have discussed and tried to convince the reader about
the advantages of the proposed methodology for SDR
prototyping. This methodology has been derived from years of
experience in terms of experimentations and permits now to
produce prototypes with short delays. It can also be used by
students as well because of its simplicity.
We are convinced that SDR design will not be solved by a
brand new design methodology. Moreover, SDR design
perspectives are as numerous as radio design perspectives
currently are.
The main point to be stressed in this methodology is the
gathering within a unique framework of several concepts and
tools that match together. Concrete limits are defined and clear
objectives are targeted. A component based approach is the
basis of the methodology, and combined with efficient and
realistic automatic transformations, it permits to propose a
heterogeneous design strategy for SDR prototyping, and at a
larger scale the design of any embedded equipments.
Other tools may be chosen with respect to the rules given in
Fig. 4. Moreover, other tools will have to be added to the
proposed flow in order to enhance its current capabilities. In
particular, we are particularly investigating solutions based on
higher level design techniques using UML modelling. Benefits
18
from automatic co-design advances in general will help as
well.
A future step of the design solution we propose consists in
merging the design methodology exposed in this paper with
the reconfiguration management that we are investigating in
other studies. This will constitute the ultimate level for SDR
design we are aiming to attain.
REFERENCES
[1]
[2]
[3]
[4]
[5]
[6]
[7]
[8]
[9]
[10]
[11]
[12]
[13]
[14]
[15]
[16]
[17]
[18]
[19]
[20]
J. Mitola, “The software Radio Architecture,” IEEE Communications
J.H. Reed, B. D. Woerner, “Software Radio: A Modern Approach to
Radio Engineering,” Prentice Hall, 2002
W.H. Tuttlebee, “Software Defined Radio: Origins, Drivers and
International Perspectives,” John Wiley & Sons, 2002.
A. A. Kountouris, C. Moy, L. Rambaud, “Reconfigurability: A Key
Property in Software Radio Systems,” in Proc. First Karlsruhe
Workshop on Software Radios, Karlsruhe, Germany, 29-30 Mar. 2000.
P. Demestichas, G. Vivier, K. El-Khazem, M. Theologou, “Evolution in
Wireless Systems Management Concepts: from Composite Radio
Environments to Reconfigurability,” IEEE Communication Magazine,
May 2004.
A. A. Kountouris, C. Moy, “Reconfiguration in Software Radio
Systems”, in Proc. Second Karlsruhe Workshop on Software Radios,
Karlsruhe, Germany, Mar. 2002.
J.P. Delahaye, C. Moy, P. Leray, J. Palicot, “Managing Dynamic Partial
Reconfiguration on Heterogeneous SDR Platforms”, in Proc. SDR
Forum Technical Conference’05, Anaheim, USA, 15-17 Nov. 2005.
X. Revés, A. Gelonch, V. Marojevic, R. Ferrús, “Software radios:
Unifying the reconfiguration process over heterogeneous platforms,” in
EURASIP Journal on Applied Signal Processing, vol. 2005, issue 16,
September 2005.
S. Paquelet, C. Moy, L-M.Aubert, "RF Front-End Considerations for
SDR Ultra-Wideband Communications Systems,” RF Design, Jul. 2004.
C. R. Anderson, “A Software Defined Ultra Wideband Transceiver
Testbed for Communications, Ranging, or Imaging,” PhD dissertation at
Virginia Tech, 2007.
J.-P. Delahaye, C. Moy, P.Leray, J. Palicot, “Partial Reconfiguration of
FPGAs for Dynamical Reconfiguration of a Software Radio Platform,”
in Proc. of IST Mobile and Wireless Communications Summit,
Budapest, Hungary, June 2007.
G. Gogniat, T. Wolf, W. Burleson, J-P. Diguet, L. Bossuet, R. Vaslin,
“Reconfigurable Hardware for High-Security/ High-Performance
Embedded Systems: The SAFES Perspective,” in IEEE Transactions on
Very Large Scale Integration (VLSI) Systems, Volume 16, Issue 2,
Feb. 2008, Page(s):144 – 155
V. Rana; M. D. Santambrogio; D. Sciuto, “Dynamic Reconfigurability
in Embedded System Design,” ISCAS’07, IEEE International
Symposium on Circuits and Systems, p.2734-2737, May 2007
J. Bier, “Use a Microprocessor, a DSP or both?,” in Proc. Embedded
Systems Conference, April 4, 2007
C. Szyperski, Component Software, Beyond Object-Oriented
Programming, Addison-Wesley, 1998.
www.omgmarte.org
C. Moy, M. Raulet, S. Rouxel, J.P. Diguet, G. Gogniat, P. Desfray, N.
Bulteau, J.E. Goubard, Y. Denef, “UML Profiles for Waveform Signal
Processing Systems Abstraction,” in Proc. of SDR Forum Technical
Conference, Phoenix, USA, Nov. 2004
J. Laurent, E. Senn, N. Julien, E. Martin, “Functional Level Power
Analysis: An Efficient Approach for Modeling the Power Consumption
of Complex Processors,” in Proc. DATE’04, Paris, France, 2004.
D. Elléouet, Y. Savary, N. Julien, “An FPGA Power Aware Design
Flow, ” Lecture Notes in Computer Science, Springer, vol. 4148, 2006.
S. Rouxel, J.P. Diguet, N. Bulteau, J. Carre-Gourdin, J.E. Goubard, C.
Moy, “UML Framework for PIM and PSM Verification of SDR
Systems,” in Proc. of SDR Forum Technical Conference, Anaheim
(USA), Nov. 2005
[21] “Joint Tactical Radio System (JTRS) Standard Modem Hardware
Abstraction Layer Application Program Interface,” version 2.11.1, 02
MAY 2007
[22] G. Gaillard, E. Nicollet, M. Sarlotte, F. Verdier, “Transaction Level
Modelling of SCA compliant Software Defined Radio Waveforms and
Platforms PIM/PSM,” in Proc. of DATE’07, 2007
[23] C. Lucarz, M. Mattavelli, J. Thomas-Kerr and J. Janneck.,
“Reconfigurable media coding: a new specification model for
multimedia coders,” Proceedings of the IEEE Workshop on Signal
Processing Systems, 2007. Pages: 481 - 486.
[24] S. Bernier, “SCA Reference Implementation,” http://www.crc.ca/scari/
[25] “Open SystemC Initiative, SystemC v2.0.1,” http://www.systemc.org
[26] F Charot, M Nyamsi, P Quinton, C Wagner, “Architecture Exploration
for 3G Telephony Applications Using aHardware-Software Prototyping
Platform,” Proc. of Computer Systems: Architectures, Modeling and
Simulation, 2003, Amos, Greece.
[27] A. Koudri, D. Aulagnier, D. Vojtisek, P. Soulard, C. Moy, J. Champeau,
J. Vidal, S. Lecomte, “RTL Code Generation from MARTE Models,” in
Proc. of Modeling and Analysis of Real-Time and Embedded Systems
with the MARTE UML profile at DATE’08, Munchen, Germany, 10-14
March 2008.
[28] http://www.mopcom.fr
[29] A. G. Kleppe, J.B. Warmer, W. Bast, “MDA Explained - The Model
Driven Architecture : Practice and Promise,” Addison-Weslay, 2003
[30] S. Rouxel, G. Gogniat, J.P. Diguet, J.L. Philippe, C. Moy, “From MDD
Concepts To Experiments And Illustrations,” in Schedulability Analysis
and MDD, J.P. Babau, J. Champeau, S. Gérard, Ed. International
Scientific and Technical Encyclopedia, September 2006, pp. 111-130
[31] B. Plunkett, J. Watson, “Adapt2400 ACM Architecture Overview,”
QuickSilver Whitepaper, 2004
[32] R. Rabineau, D. Lattard, Y. Durand, M. Lobeira and JP Rossi, ”Flexible
Test-Bed for B3G Systems,” in Proc. of IST Mobile and Wireless
Communications Summit, Mykonos, Greece, June 2006.
[33] Y. Durand, C. Bernard, and D. Lattard, “FAUST: On-chip distributed
architecture for a 4G baseband modem SoC”, in Design & Reuse IPSoC, Grenoble, France, Dec. 2005
[34] V. Bose, “Design and Implementation of Software Radio Using a
General Purpose Processor,” Ph.D. thesis at MIT, June 1999
[35] T. Turletti, D. Tennenhouse, “Complexity of a Software GSM Base
Station,” in IEEE Communication Magazine, February 1999.
[36] A. Hoffmann, H. Meyr, R. Leupers, “Architecture Exploration for
Embedded Processors with LISA,” in Kluwer Academic Publishers
ISBN 1-4020-7338-0, Dec 2002.
[37] P. Coussy, C. Chavet, P. Bomel, D. Heller, E. Senn; E. Martin, “GAUT:
A High-Level Synthesis Tool for DSP applications,” in High-Level
Synthesis: From Algorithm to Digital Circuit, Springer, to appear, 2008
[38] D. Kammler, L. Godard, I. Gomez, “Second Report on ASIP Design
Methodoloies,” NEWCOM report DR4.4, February 2007.
[39] J-P. Calvez, V. Perrier, “SOC Architecting and Design with CoFluent
Studio, Concepts and. Methodology -Part I,” available at:
http://www.cofluent.com
[40] M. Raulet, F. Urban, J.F. Nezan, C. Moy, O. Deforges, Y. Sorel, “Rapid
Prototyping for Heterogeneous Multicomponent Systems: an MPEG-4
Stream Over an UMTS Communication Link,” in special issue on
Design Methods for DSP Systems of Eurasip Journal on Applied Signal
Processing, Kluwer Academic Publishers ; vol. 2006, pp. 1-13
[41] J. Muttersbach, T. Villiger, H. Kaeslin, N. Felber, W. Fichtner,
“Globally-Asynchronous Locally-Synchronous Architectures to Simplify
the Design of On-Chip Systems,” in Proc. of 12th IEEE International
ASIC/SOC Conference, September 1999, p. 317-321
[42] A.R. Rhiemeier, “Benefits and Limits of Parameterized Channel Coding
for Software Radio,” in Proc. of 2nd Karlsruhe Workshop on Software
Radios, Germany, March 2002.
[43] J. Palicot and C. Roland, “FFT: a basic function for a reconfigurable
receiver,” in Telecommunications, 10th International Conference on,
vol. 1, pp. 898–902, 2003.
[44] C. Moy, J. Palicot, V. Rodriguez, and D. Giri, “Optimal determination
of common operators for multi-standards software-defined radio,” in
Proc. of 4th Karlsruhe Workshop on Software Radios, (Karlsruhe,
Germany), March 2006.
[45] V. Rodriguez, C. Moy, J. Palicot, “Install or invoke?: The optimal
tradeoff between performance and cost in the design of multi-standard
[46]
[47]
[48]
[49]
[50]
[51]
[52]
[53]
[54]
19
reconfigurable radios,” Wiley InterScience, Wireless Communications
and Mobile Computing Journal, Special Issue on Cognitive Radio,
Software Defined Radio And Adaptive Wireless Systems, vol 7, issue 9,
Pages 1143 - 1156
J-P. Delahaye, “Plate-Forme Hétérogène Reconfigurable : Apllication à
la Radio Logicielle,”Ph.D. thesis, Université de Rennes 1, 18 April 2007
http://www-rocq.inria.fr/syndex/
T. Grandpierre and Y. Sorel, “From algorithm and architecture
specifications to automatic generation of distributed realtime executives:
a seamless flow of graphs transformations,” in Proceedings of 1st ACM
and IEEE International Conference on Formal Methods and Models
for Co-Design (MEMOCODE ’03), pp. 123–132, Mont Saint-Michel,
France, June 2003.
M. Raulet, M. Babel, J.-F. Nezan, O. Deforges, and Y.Sorel, “Automatic
Coarse Grain Partitioning and Automatic Code Generation for
Heterogeneous Architectures,” in Proc. SIPS 2003, Seoul, Korea,
August 27-29. 2003.
M. Raulet, “Optimisations Mémoire dans la méthodologie «adéquation
Algorithme Architecture» pour Code Embarqué sur Architectures
Parallèles,” PhD thesis, INSA of Rennes, France
T. Grandpierre, C. Lavarenne, and Y. Sorel, “Optimized rapid
prototyping for real-time embedded heterogeneous multiprocessors,” in
Proceedings of 7th International Workshop on Hardware/Software
Codesign (CODES ’99), pp. 74–78, Rome, Italy, May 1999
F. Urban, M. Raulet, J.-F. Nezan, and O. Déforges, “Automatic DSP
cache memory management and fast prototyping for multiprocessor
image applications,” in 14th European Signal Processing Conference,
Eusipco’06, Florence, Italy, September 2006.
A. Maccari, J.F. Nezan, F. Urban, M. Raulet, “Interconnected
distributed RAM in SynDEx,” in Workshop on Design and
architectures for Signal and Image Processing, DASIP 2007, Grenoble,
France.
J.-F. Nezan, O. Deforges, and M. Raulet, “Fast prototyping
methodology for distribued and heterogeneous architectures:
application to Mpeg-4 video tools,” Design Automation for Embedded
Systems, 2005.
Christophe Moy was born in 1972 in La Roche sur Yon, France. He
is an engineer of the INSA (National Institute of Applied Sciences),
Rennes, France, 1995. He received his M.Sc. and Ph.D. degrees in
Electronics in 1995 and 1999 from the INSA. He worked from 1995
to 1999 on spread spectrum and RAKE receivers for the IETR
(Institute on Electronics and Telecommunications of Rennes). He
then worked 6 years at Mitsubishi Electric ITE-TCL research lab
where he was focusing on Software Radio systems and concepts,
including digital signal processing, HW and SW architecture, codesign methodology and reconfiguration. He also addressed the
design of SDR impulse UWB systems. He represented Mitsubishi
Electric at the SDR Forum and worked on the French research
program A3S (design of SDR through a Model Driven Architecture
UML-based methodology), as well as the IST project E²R phase 1.
He is now an Assistant Professor at SUPELEC, the oldest French
engineering institute on Electronics. His research is done in the
SCEE lab of the IETR, which focuses on Software and Cognitive
Radio. He addresses heterogeneous design techniques for SDR, as
well as cross-layer optimization topics. He participates to the IST
Network of Excellence NEWCOM++, in the flexible radio work
package. He is also involved in E²R phase 2 and several French
RNRT project on SDR called Idromel and Mopcom..
Mickaël Raulet was born in 1977 in Saint-Brieuc, France. He
received his postgraduate certificate in signal, telecommunications,
images, and radar sciences from Rennes University in 2002, and his
Engineering degree in electronic and computer engineering from
National Institute of Applied Sciences (INSA), Rennes Scientific and
Technical University. Next in 2006, he received a Ph.D. degree from
INSA in electronic and signal processing in collaboration with the
software radio team of Mitsubishi Electric ITE (Rennes –France). He
is currently in the Institute of Electronics and Telecommunications of
Rennes (IETR) where he is a research engineer in rapid prototyping
of video standard compression algorithms on embedded architectures
(multi-DSP architecture) and he is the Image Group project leader of
French Media and Network project such as Scalim@ges (MPEG-4
SVC main topic) and Mobim@ges (DVB-H main topic). Since 2007,
he is involved in the ISO/IEC JTC1/SC29/WG11 standardization
activities (better known as MPEG) such as a Reconfigurable Video
Coding Expert.
His interests include video standard compression and
telecommunication algorithms and rapid prototyping on multi-DSP
architectures from Texas Instruments.
20
'-
$
"#0 -
504
-
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
0 (
#
A
,
9
"
, '< 0 '#0.
?
;B
+0
;
# "
0
5 $
0 (1#/#"("#
/ +<
+" 1
! ( 3# #)
3
+0
+0
!
"
#" $
&'%
%
() *
"+
,
-!
"
-
#" $
-%
.
/
-
!
!
"# !
!
!
!
$
!
%
!
& '#
(
'
#
(
'#
)
#
)#
$
'#
(
'#
$
(
(
$
'#
( (
!
*
+(, $
*
.
!
!
$
*
0
0
#1
0
0
#
"
0
2
0
"
"
0
#
0
0
34
0
#
#
5
&
6 3
74
3
84
3
94
-
#
3
:4
-
;
#<
#
#
5
+
6
?
"
=
#
#
#
'&
"
&
5& 6
5 &&6
0
:
@:
"
- *
AB !
#
&
"
5
0
#
)
#
#
#
6
"
"
5!6
&
#
#
0
+
0
- A0
0
"
#
#
0
0
=- )
3
>4
"
-
0
" #
#
#
- *
0#
- )
0
- *
&
5
=
0
-
"
0
=
# 3
74
3
94
- )
6
5
0
6
&
#
#
#
D
- )
C
"
5
!%% 6"
# 5
+%! 6"
- *
5 &% 6"
0
E
-F
#
E
"
G
- *
- H
5
6
0
=3
I4
- )
#
#
0
- )
0
=
0
-
E
"
#
#
E
0
"
#
- *
J
0
#
&
-
"
34
-
#
#
=
0
-
#
1
"
#
"
- *
#"
"
"
0
E
#
#
-)
- *
0
"
1
-
"
#
=
0
"
0
#
-*
0
#
0
="
0
"
*
?
0 -H
7"
- &
E
"
E
- &
0
E
E
- +
#
#
0
" 0
#
0
0
"
- *
9
-)
0
#"
:- *
8
"
#
"
#-
7
<0
"
E
#
=
34
3
@4
-
0
=
#
# 5
#
#
- +
?
# &
6 0
#-
56
+
34
+
56
"
0
#
1
•
1
•
"
1
=
•
"
"
1
"
0
&
-
!
)
"
" 0
#
$
#
"
0
"
=
-*
0
+
%
$
&
-
0
#
#
=
"
7-
&%
;&)
#
0
0
'
#
#
#
;&)
1
"
" 0
" 0
0
#
"
"
"
"
- *
0
=
=
8
#
#
0
-)
#- *
#
#
0
"
-)
"
#
#"
"
- )
0
"
5
K
=
#
#
"
0
"
#
"
0
=
#
6
"
"
- *
E
0
"
E
"
#"
-
0
- H
=
3
L4
" 0
"
?
=
"
5
#
6
#
<
0
#
(
-H
=
0
#- K
- *
"
#
!%&
"
#
-
0
=
E
=
0
0#
=
3M4
A
0
•
0
•
E
•
#
#
0
1
34
"
#
=
E= 0
#
374
"
384
-
#
E #
?
- *
0
"
"
#
3
:4
394
3:4
- ;
- +
#
5- -
6
"
#
"
*K
#
#
3>4
"
0
#- *
#
E E
E
-
#
- H
#
0
#
E
E #1
0
#"
#
#
"
"
"
-F
-
9
() $
H
0
*
+
80
-
#
smart
sub-system
analysis
stimuli
decision
learning
user
SDR communication
sub-system
network
environment
Application layer
sensing
means
Multiple physical layers
Multiple
Multiplephysical
physicallayers
layers
hardware
*
&
#
E
#
adapting
means
…
electro-magnetic
environment
*
orders
) $
5
+
6
= 5
6
0
5
#
#
0
6
-
G
0
-
+
"
E #
=
&
" 0
1
-&
- +
+
0
#
E
-*
0
:- *
=
#- *
#
#
"
+
0
−
#
−
−
0
−
E
1
+
#" &
#
"
0
5
!&
#"
"
="
"
"
-6"
E
" +%!
" ' *&" H +"
-6"
#N
"
5
#
0
5
&A 6"
"
1
1
*
A
81
-
=
"
-
-
0
"
="
=
1
"
0
5
"
6"
-
#
=
3I4
-
0
0
#" 0
- *
0
0
G
=
0
= 0 0
=E
?
=
?
-;
#
=
-
0
"
=
- H
0
- H
"
0
#
#
0
-
#0 #"
0
=E
:
E
- )
#
0
=E
,
#
&
#"
) $
"
-*
E 0
E
-+
#- )
0
E
6
" H+
H
"
0
" F
#
"
&#
% A
KFE
6- *
*
#
'HF 3L4
#0
0
#
10
5
E
0
#" 0
#
E
5
0
6"
#
-
&
62H
-F
E
:
"
" O &"
"
-&
#
&
E
#
0
- A
0
"
0
#
-*
#0
"
#
0
#
6
0
0
"
:
&
-+
+
"
3@4
"
#
5
"
F
0
E
=
#
A
*
#
#
E
' *& 5
!
)*& 5
)
0
#
!&
" !%&
*
0
-
*
0
#
#
"
#
(
- *
0
&
- H
#
0
0
" 0
3
7M4
-
0
#
0
./
#
#
6
#
#
#
5
G
#"
#
-*
#
-
5
#
# ;&)
#-
3
I4
6- *
-;
76-
#
0
6- *
-F
"
- *
#
#
#
-
&
#
#
0
=
3
I40
5
#
./
0
+
"
?
*
5
#
-*
&
"
-
0
0
#
"
-;
0
0
0
#
E #
G
#
6
#
#
0
;&)
1
#
5
% P
#
-
>
*
#
" 0
=
0
0
"
-
0
9"
0
3
7 4
- +
"
E
0
=
"
#
#
-*
+
0
-
E
0
#
=
E
?
-
=
E
"
-
#
E
,
E #-
0/
Li_ReMU
5
Li_CRMU
6
,
,
-
0
*/
1
2
2
=
#
"
-
#
#
#
"
#
-
"
*
#
-
#
0
#
#
"
= "
0
#- *
#"
0
#-
Q
0
" 0
-
- )
0
Q
#
#
#-
H
0
7
#
E
7Q
"
-
7Q
0
'
8Q
"
" #
0
'-
#
&%
+%! - *
8Q
'
-
#
8"
#
8Q
'
6
7
8
"
#
5
=
'
#
#
-
)
?
#
=
•
+
:-
" 0
E
Q
0
#
- *
=
#"
#
#
-
Q
"
Q
5
E
0
=
#
" %
6"
"
#
I
0
7Q
'- )
•
7Q
'
8Q
1 56
'
=
#N 5 6
#
#
#
-)
# #
0 #
7Q
(
5
7Q
- *0
Q
'- 7Q
HE
'
6
#
Q
-
•
8Q
'
#
-
8Q
'
1 56
8Q
8Q
'
#
#
0
#
0 #
=
0
'
"
"
0
N5 6
7Q
&
7Q
'"
=
-
8Q
'
"
'
-
Reconfiguration Management
Level 1
Cognitive Manager
L1_CRM
Reconfig. Manager
L1_ReM
Standard Set
Level 2
L2_CRMU
L2_ReMU
… L2_CRMU
L2_ReMU
…
Level 3
L3_CRMU L3_ReMU
L3_CRMU L3_ReMU
…
reconfiguration
sensing
orders
information
data from
previous
operator
operator
operator
3
%
'
$
data to
next
operator
./
"
$ "
)
#
=
- &
#
-)
=
#
= 0
+
#
#
0
Q
#
- *
0
#
- *
?
3
774
- )
"
#
= 0
"
"
0
8
=
-
@
3
H
#
#
=
N56
0
-H
0N5 6
E
N5 6
0
N5 6
E
0
N5
- *
- )
"
#
0
"
6
E
=#
0
"
"
"
"
"
-
#
0
-+
0 #
; G E;
'
5
'
#
- *
6 3
784
- '
#
#
0
- *
'
#- F
'
#
- )
=
#
G
#
5
"
- +
=
&
6
>
-*
E
'
- *
0
#
-*
- *
"
G
- *
- F
0
- *
#
0
#
G
#
G
E
=
R
#
E
" 0
0
-
5
'
5
R
0
#
./
6
./
6
5
A
+
./
&
./
&#
4
#
K
%
(
./
-
L
*
./
4
*
*
4
<
E
2
+
#
I
0
6
#
+
- H
: 0
E
#
0
E
#
0
"
?
#-
6
*
#5
4
?
./
0
#
#
#
#
- )
=
"
-*
0
"
5
;
;
6-
#
<
(
*
+
I
% $
0
-
5
;
6
#
E
-*
- *
=
0
G
6-
# 5
=#
5
#
- )
6
G
G
*
0
Q
-*
Q
#
Q
#
Q
Q
-
"
H
0
0
0
= 0
-
"
M
0
0
8Q
'
8Q
'
-*
"
#
H
- )
8"
+
@
8Q
'
8Q
'
#
7
0
0
8
*
$
0
- *
-
#
#
-
7
./
$
#1
•
0
=
•
"
•
5
) "
)
0
5
'6
) "
0
'66 0
"
•
0
•
=
"
0
"
# 0
"
"
5
'6
5
'6-
#"
-
5- -
"
7
86
-+
"
•
1
01
ST 7Q
'56"
•
71
ST 8Q
'56"
•
81
ST;
-
?
"
•
1
#
•
"
"
•
"
•
-
*
8Q
5 E
5 Q
6
7Q
#
0
#
0
6"
0
#
8Q
&%-
'56 5
#
'566 0
5
8Q
"
'
"
-
8Q
'Q
7Q
'
-
&%
+%!
-6
0
#
#
0
0
5
L" 0
'"
7Q
0
0
0
E
'
6
7Q
'
?
+%! "
+
=
=
- H
8Q
'
"
"
8Q
'Q
#
#
8Q
7Q
8Q
- H
'Q "
'- H
'Q 0
7Q
#
'
8Q
=
8
**
0
'QF 0
8Q
'QF-
&$
4
F
$
- )
E
"
0
#
0
#
0
5
#
#
5
=
=
#
0
0
0
"
=
-&
- *
0
6
0
- *
#
#
"
=
-+
9
6
./
M
#
0
-
$
#1
•
•
0
=
) "
)
) "
"
7
•
5
0
5
'6
0
'66 0
"
•
0
•
=
"
"
"
"
0
#"
-
=
=
- H
"
"
0
"
0
0
"
- )
=
#?
-
0
=
#
* *
"
=#
0
"
#?
= -*
#
- *
#
0
"
0
"
+
$-
#
- O
#
#
2)
+
0 #
"
#
"
0
"
#- & "
#
0
E
- +
#
#
#
= 0
#
#
<
"
0
- *
<
-*
"
- *
0
#
1
1
1..*
Capture Metric
Metric
1..*
Evaluate
Evaluate Metric
Analyse Metric
1..*
Analyse
1..*
has
1..*
Action
Define Action
Priority
Prioritize Action
Define Metric Strategy Action
List of Action
1
1
"
*
0 1
•
1
•
#?
1
#
-
*
0
5
'6
•
1
"
-F
"
8
•
%
?
1
5
"
#
•
&
#
6
1*
#
0
6
5
"
5
'6"
#
0
*
"
#
#
#
#
-
#
"
- *
"
•
0
#1
"
•
"
•
0
•
="
?
5
*
"
-6-
"
#
=
-*
* *
:;
)
" 0
E
#?
- H
"
0
H
0
"
#
=E
0
0
-
E
#
- )
0
=E
"
0
=
#
Q
- )
= 0
)
-
"
#
0
E
- *
=
#"
=
#"
-
"
C
=
D
#
"
0
Q
0
0
#
- )
=
#
"
"* (
0
Q
#
- %
"
-
"
"
#
#
"
#
- +
- )
#
#
"
=E
0
"
0
0
0
-
"
-
0
0
"
7
0
=5
#
"
-6
#
=
#
0
-
<
9
;
#
?
- *
0
0
#
#
-
#
#
"
0
#
"
-*
1
=
#
Q
"
"
#
"
-
#
"
#- &
"
#
#
#
- %
0
#
1
"
#
8
0
!%&
-
*+
8
&$
0
E
;
- &
#"
;
=
-
*
;
#
*
&$
8Q
'
8Q
'-
E
= +%!
&%
#"
0
"
"
-*
#
- +
0
=
H
-
*3
'
4
*
G E
#"
;G
+
#
#6 3
794
#-
G E
;+5
E; G
0
#
E
E
+
;+5
E
6
E&
E
5 & 6-
0
"
#
E
- R
3
7:4
#
#
)A )
#
E
- H
R
0
0
-*
#
=
E
&
0
- *
#
" 0
-)
"
"0
- )
:
0
#
#- *
"
"
=
#
#
- *
0
#
#
# 5
6
# =
#0
-
#
H
-
./
-
R
#
+3
7>45
0
5
=
+
E
7=
"=
7
"
=6"
0
E
"=
7
0
7
(
"
7=
"
B )
7
R
(
E
- *
#
*
5
'
7-
6
0"
+
+
6- +
I"
0
9"
-
-
*
(
#
2
0
&
./
R
E
- *
7=
#
-
E
#
E
-)
"
5
B )
6
R
- *
E
#
-*
#
#
"
- *
#
"
#
"
5
0
E
"
"
=
"
6-
+
:
0
+
3
774
-
"
- )
#
B )
6- )
Q
I"
#
#
=
#
B )
5
1
0
Q
>
- *
+
0
#
0
E
;
#
- *
&
# 0
#"
#
- F#
0
"
"
"
"
"
(
#
0
"
-*
-+
#
=
#
3
<
./
-
4
-*
#
"
0
"
5
"
-)
6
"
=
#
" O &"
0
<
I
#
#
"
H
"
-)
0
"
"
"
#
60
-*
#
0
5
#
0
- H
"
"
- H
0
0
=
-
=#
E
*
0 #
0
0
=
0
#
"
#
"
#
#
"
"
-
E
- *
#
-
" 0
"
"
#
&#
6- H
"
#
5 #
5
'
=
- *
6
#
0
=
"
- *
- ) 0
#
A
0
E
#
-)
#
0
0
-)
#
H
"0
0
"
0
=0
"
-
3
=
%
3
%
%
*
2
0
-*
-
#
-*
#
#
- *
"
#
5
#
E
6"
0
H
#
3
7I4
-+
#
#
#
- *
0
0
#
0
0
?
-H
@
3@4
*
-
F
&
0 1
7Q
'( 7Q
#
'
"
8Q
'( 8Q
- *
'
0
#?
: L3_ReMU
: L1_ReM
: L1_CRM
: L2_ReMU
func_fusion
: L2_CRMU
func_fusion
: L3_CRMU
BWA
BWA
0
:L3_ReMU
BWR
Input signal Operator
BWA
:L3_CRMU
BWR
+
Operator
BWR
>FH 1 F
FH 1 F
# 1 #
HK*1 H
1
:L3_CRMU :L3_ReMU
Cyclo
WVT
:L3_ReMU
Cyclo
"
0
0
#
K
:L3_CRMU
WVT
:L3_ReMU
Hspec
Operator
WVT
Operator
Cyclo
:L3_CRMU
Hspec
Operator
Hspec
Signal to analyse
Searching module
Analyzing module
5
=
*
0
F
-
&
"
1
H
" FH 1 F
*
#
"
H
5
FH 1
#
6
1
#
#" HK*1 H
5
6
5
- H
C
"
-*
- ;
#
-
"
#
" 0
#-
"
8Q
>
'-
#
7Q
#
+
"
#
FH "
?
#
'
7
D
"
0
8Q
EK
+%! 6
'
7Q
?
'-
7Q
'
" HK*
"
- *
Q
0
0
E
3@4
(
-H
0
#
- *
-%
+
#
0
- *
#
+
>
>
0
=
#
0
(
=- )
"
#
"
"
0
#
*
0
-*
0
#
*
#?
#?
#?
#
0
-
#
0
-
- &
#?
3@4
1
L
•
0
1
0
•
#
•
H
"
#1
0
EK
E
1
0
+
•
*
#
%
0
#
-
'
0
0
0
- )
5
- )
#- +
"
&"
1
3
#
"
"
" 0
?
"
-6"
"
0
=
"
I
0
0
Q
0
"
Q
"
7Q
'((+
Q
7Q
'((+
Q
=
Q
-H
= 0
5
0
0
"
#
0
"
"
#
-H
7Q
8Q
Q
#
-*
+
>6- *
'"
- H
Q
Q
'((+
Q
Q
0
#
=
"
-*
-H
#"
-
6
=
)
7M
3*
<
"
0
0
#?
+
@- *
-*
#
6%
6
0
7
6%
#
7Q
8Q
'(
(
'(
(
0
=
Q
Q
$
H
-
2
#
#?
- )
0
"
#?
#"
#
"
#?
0
+
#
L-
8
*
"
"
-*
E
"
#
"
0
#
=- +
#" 0
#
1
"
"
#
-)
5
7Q
"
'
"
0
- *
"
6
7Q
Q
'-
"
Q
Q
"
"
8Q
=
'((
"
0
?
"
-*
#" #
- *
#
0
)
- +
0
#
- *
-
7
*
0
0
0
0
#
0
- ; +
= 0
0
"
7Q
'
8Q
@56
MMM= ?
@56 0
8Q
-; +
'((
Q
0
-
Q)
8Q
- H
#
7Q
Q
'
- *
-
MMMQ+ 5MMM
#
#
+
+
#
F
"
60
#
0
7M-
9
>
*
$
:;
Q
=
- *
)
"
=
#
Q
0
-;
0
#
-
Q
=
- H
" 0
"
0
#
Q
-)
"
7Q
"
'
8Q
'-
3H
#
#
-
#
#
- *
0
-
-*
#"
#
#
0
-*
"
0
#
#
0-
"
" 0
# = 0
#
#
0
- )
-*
0
#
#
0
#
#
#
0
"
#
#
- *
-
#
0
0
5
- +
0
#"
= -
0
"
0
•
#
0 #
#
-H
- *
61
=
"
77
•
3
7@4
0
5#
6"
•
#
"
•
E
"
•
&
•
%
•
A 0
#
=
"
0
"
=
0
0
-
5
H
- F#
5
"
"
- *
5
-6 0
E
6
"
#
-
5
=
=
6
0
0
+
"
"
0
-
"
=
- *
0
#
0
#
- F#
A
"
- *
"
=
#
#
-
-
0
0
#
#
H
-
E
"0
=
#
#
-*
#
E
-)
0
0
"
"
"
-H
#
0
"
0
"
#
H(&H
0
0
"
0
)(;
0
=
#
"
=
-*
- )
"
=
# '
%
#
* 63
7L4
0
E
&#
"
0
(
=
5
1
&%"
"
0
E
#
5
-
0
0
"
'
"
-*
#
0
"
= "
- )
#
"
"
-6- *
#"
0 #
+%! "
0
=
- )
0
"
=
#
5
#"
0
---6
#
#
-
78
6
34$-
" C
1
% - -
"
3
74$
" C
*
)
&
#
)
)
-
0
&
*
-" &0
"
A
*
D
" )
0
D
"
# 7MMM
#
" LL7 E
;G
M- ML(A* - LL7-7>I@IM
3
84+- R- $
" C
&
$
E
EF
H
3
94$- -
- H
" C
&
0
1 F
E
=
&)%
7MM:18" 7I:E7@8
1
D
"
" 7MM7
#= " C
&
3
>4+
D
" '
A 0
" F-
%
3
:4&-
0
0
" 785
:6"
" C
&
%
A
# *
H
D
" )
- 7M E77M" +
=+
D
"
*
$
7MM:
=
A - M7E 8:"
- 8:E:8"
I" 7MM7
3
I4$-%-
# "
-
#" %-
&
#" $- %
%
D
"&
" C
+
#
%
*
"
5
'& 6" A
7MM:
3
@4$-
" !-
" C
D
" )
1
3
)
=
0
" %
H
4
" K - >" A - 9- 5LLL6"
-
8E @
3
L4 )* *
* =
3M4A = "
;
- &V
8 " C )*< U
" $- %
"
)
7MMI" F
3 4!-
"
-
0 "
-"
! 0
F
*
&
W
MI" E: $ #
" A - 7MMM
- &
" C
"
D
" *
"
" C
" A 0 P =" 7MM "
=
D
"
)
" X
*
0
%
)
+
X
D
" $-
- 78:E7:M0
=
?
)
5
)
6"
" LLL
394H- + "
A 0
F
E 0
&
"
K
# "C
D
H
#
-
384B- O " $-
?
#
-R ?
"C &
* 7MMME8@ "
374 -
#" $-%-
D
" )&*
"
"
-
<
- R
=D
")
?
"
- &
NC
%
$
&
EF
&
*
&
%
" + - 7MM@" K
1 7" )
1
% #
"
" - I9E@I
3:4 - ! ?
?"
-
" +-
" $- %
" C
1
" * 0
%
"+
D
"
A
3>4+
5
7@(M8(7MM>6 7MM>"
0
1((0
IMM
-
3I4%- %Y# Y
-
I"
- IL9E@M9
?
"
(
" $-
- I"
(
=
-
2G S
" M- &
J S 0
A 0
"
3@4 -
" $- %
*
3L4 &#
3
7M4 $-
"$
- F
D
" %)
#"
"
-
" &- %
W
M:" F
- A = " %" +-
=
D
" !
#
E7 7MMI
D
" '&)% ;W
MI" % ?
#"
IMM
" C
)
#" C A 0 &
"%
" 8EI
" C
'
"&
#" $-
#" F-
&
"
7MMI
EH
F
7MM:"
7"
" $- %
"
- %
" C
)
'
- II:EIIL
-A
;
" R- R
1
;
"
-
0
"
%
79
&
D
"
;
7MM@
3
7 4- !
"
-
#" $- %
&
3
774 - !
"
&
D
"
#" $- %
0
#"
W
M>" @E M$
R
H
7MM>"
=
&
)
7MM -
#=
"!
0
"
-
E :
" H& W
M@" R
&
(M8EMLE :"
3
794; !-
;+
"
3
7:4%- -
+
-
" +- +
( E
=#"
3
7I4 -
-&
" $- %
(
2
"
-
-
"
="
"
; G
(M8E MEM9" 7MM9
"
-
7-M" &
(-
" C
H
F #" $
#%
"
&
# " $- - $ ?
D
" A &"
H
1((000-
7-M
1((000-
3
7>4+- F
"
7MM@
3
784; !- ; ! '
!
" H
" C
D
":
!
*
" C
+
&#
-
+
;
#
&('
="
<
7MM:" &
*- !
G E
5
7MM:6
"
+
0
=-
" 7MM8
" C
0
H
D
" )
-
-
$ # 7MM8
3
7@4$-%-
# " %-
#"
-
#" C
%
5
'& 6" :EL A
3
7L4 '
%
%
D&
+
&
*
<
MI"
7MMI
* " F
" ; !
&
" ; !
A
1
(MIEM@EM9
7:
'-
B;
$
"#0 -
504
-
"
, '< 0 '#0.
.$
,$
# #$
03
1$ $
!
1 "
'-
'- "
'-
$
"#0 -
504
(
-
#+
#
1$ $
!
1 "
'-
'- "
"
(
#+
#
c (d '>
> 504<
6
9
6
B ?:
c ( d '>
_/
1
, E
.$
,$
# #$
03
"
,
D
_<
%
%
'0
< _1
9
D
> 504<
$
/ 5#" #
< " < @+
A
T0 3#0
/
B ?@ % "
$
"< ,
" C
5.
B
1<
"(
_<
;;? )
, '0
<
" S
<
c (=d
T0 3#0
$
"< '>
> 504< ,
5.
1<
, '0
_
'
"
( " C
.
5
+
#
5#"< !"5< 1!
.
#
>_< E#' / S
<
' (< " < +
T0 1 $
<1
E0*#$
" T< >
c (;d
'- 5
<*
E$
1 ,< *
+'>
1
5 >
(_< 5
# Z X >
c (@d'>
> 504< 5 X Y
! (!0!3$ #< >
1 "/
4
1 3 /< _ 5,
"1 /
# >
'
"0 ,
1< '>
> , , 33< _
1 # '
> 504< * Y
5
#
' +
<5
>< ; 5
>
, #< "
0 F ,< *
+ >
4< 3
. ,#
<*
+
Z
"
"(
< >
2< " <
;
c (:d *
>
5
.
T 3"$ T< 1
, 3V< #>
Z$ .T < "
# / V0,,$
<
2 .$
"$ F< '>
> 504<
- ,$
5$
'< '
10,Z$
3< _5
'
#
_< #> T
> Z X >
"
@?+:=< T
> ><
< B+
;
c (Bd '>
> 504<
T0 3#0
$
"<
2
c ( d"
'>
'>
B
# >
c ( d"
/ 2
#
5
+/
D
(
0
R
eE
! ,< '>
>
"
> 504< *
1
, '< 0 '#0.
6
_<
+/
"
_<
C
>
"
3 V 3< '>
5#" '
1
:%
:)<
> 504<
0 F ,< ! (!0!3$ #< *
+ >
1$
!
> 504< _/
511 '
#
2
X" >
(
(
511< $
(
<
:<
+ =
+"
#<
1<
A > ! ,#'- E<
T0 3#0
$
"<
>
C
< Z" 9;<
> 504< 5 X Y
, #< _- >+,
1
(
5
+
"(
R"
1
c (?d 5 X Y
, #< /
. 3< *
0
1 /0 ! "< 4 "0 ,< _
5
"(
5 !+; "
*
"
1" "(
<T C
>
=
<
@@7 " 7 :7:;=:?
1$
!
!0 .
.$
"$ F< _ 6
_< f
= ? ??? @
D
c ( d '>
1
*
$
<
#< *
, X_<
5 >
$
1 :;=:?<
+,
$
"
,$
'0#< _#C "
5
+!
-$
,$
_<
(
B@
'-
504
$
"#0 -
2
5
-
_< $
'
c ( d'>
1
$
5 'SB<
6
V< '>
C
<^ Z (Z
$
'
<E
B< $
<"
"(
B%
> 504< *
<"
B<
?<
T0 3#0
$
"< '>
T
> Z X >
<
+
c (Bd*
_5
"1 /
+ >
1 , - 4<
1(
# >
'
c ( d*
+ >
_< "1
1 , - 4<
1
/
# >
3 /T- <
1 , - 4<_
_< $
"# 5
<-
c ( d
3 /T- < *
,$
'0#< _ >
T
> Z X >
<
c ( d'>
'
B:
!$ $
< ]0
<^
<
1
;= R
X P
@:
> 504< _
" C
,
"1
9@<
>
,
>
'
<
C
2
.$
"$ F< "
"(
> 504<
'
< Z" S
> - '- 5 3$
<*
$
_<
6
<
> - '- 5 3$
<*
6
"
@
;<
,$
'0#<
_<
@
<
" < @+?
B
" ! $ < '>
< Z" S <
> 504< *
?+
,$
'0#< '>
> 504< *
+
#
$
"
SB< +@ A
B<
.
1 ,0 5 <
C
> 504< _1
9B< 1
6
"
C
6
" <
'
"
> 504< *
-
4< '>
"1
.
Z
"
(
< Z" 9 <
4< '>
:
+
5 - "-< '>
> 504<
>
> 0
L
( > $
" ! $ <*
c ( =d, U !01
1< '>
>
>
Z X >
" C
c ( @d
'- "
1
,$
'0#< ]$
>
5
'
C
> 504< *
(
=@+ ; < T
><
>
[ , #< _
_< $
5 '9@< .
<"
c ( d, U !01
1< '>
> 504< * 6
5
'
5
*
:< 5(X
<! & <
+ @%
'
Z
3 C X
'05
)
c ( ;d
'-
T0 3#0
$
"< '>
> 504< ,
5.
1< _
( " C
"(
_< /
T
> > Z X >
< Z" 9 <
:?+B;< T
> ><
< ?+=
c (:d
(
_< "
T
> ><
c (?d
>
'
.
6
'
c (;d"
#
! ,<
>
E$
301< * 6
,$
'0#< _ !
>
'>
L _<
"$ < #> FF$
F!
"
< '>
% " )<
c (@d
T (
1 "
$
,$
'0#< E
01 $
!
V< 1
0
5
+
" C
> Z X >
" C
<
01 $
!
*
Z
<
1$ $
!
)
> 504< *
'
; >T
c (=dE
#>
>&
.$
,$
# #$
03
6
,$
'0#< _/
"1 "(
+Z
.
@
'
_< '
C '
'
,$
'0#< _
6
_< @ >T
;=+;B< T
><
,$
'0#< '>
@?<
> 504< _, .
B
,$
'0#< '>
> 504< _
, '< 0 '#0.
6
_< @ >
S:< +
0
>
>
<5
3 C"
"
'-
$
"#0 -
504
-
"
'
B
B%
((
"
#
$
#+
#
"
!
cd * 5$
#0, < _#>
C
==%@) :+= < 5 ( ??@
cd *
(+ =+
5$
#0, < _'
_< >1
c
;d "
$
- 4T$
3< _'
*" '< =%@)<
c
@d _"1
/
c
Bd 5
5
>
<_ $
@ +@e5 (
( $
#
+
@
</
C
_< $
C
'- "
<
< =+
5
L <
"
_<
<$
".3+
<
"
<5 (
Z
C
'
7
_<
7
2
#
> 6
"
< ?+ = 5 (
=
C
% )
'3" '
cd 4
"0 ,< ]5
(
>
>
6
$
'
5
;;R@=< $ > < $ (< 5 ( ??;
c
?d 5 X Y
6
'-
)
>
>
>< "C
L/
3!-+.0 0 * 34< ]"
<^
B
>3! 4 3< _"
'
L
'
#
.
+
1 "
>
$
<
1$ $
!
"$ '0SB<
'0
@ 4 . X_< > 77CCC
g 27(
X7?B
g @g
c
:d . >
_<
1< _" C
5
'
@ + e$
".3+ = ?B + + =+
c
=d *
1
.$
,$
# #$
03
, #< _0
>
(
>
(
5
&
C >
(<^
'
>
$
%5 '" 9
?;)<
"(
5 >
9
_< #>&
'
<
:
c dCCC
cd
+
"
!
c d]*
-$ 5 $ < ].
<^
>
C
(< 5
#
"(
$
c=d " .
<
"
c@d V
c:d0
>
"
<5 "
<^ >
</ E
Z
1
77CCC L
>
<
!
c?d #"$ " %
> 77CCC
77CCC
7>
>< E 5
7Z
X >
5 (
C
B
L '>
'
" C
<
, (
77CCC
< ]#
E
"
> Z
7
,
7
5
$
57 "5<^
"'
7
"'
cBd !3
c dF
5
<
$
3
C
B
1 # 9B<
T
%*# ") "
<^
< ]"'
c;d! !
,
A
>
<
>>
77
j < ]"
C
(
"$ *
<^
@<
"
"(
7 >
7
>
/
<
"
#
:< "
>
@
)
7
, '< 0 '#0.
"
2
BB
'-
$
"#0 -
504
c d/''< ]"
3
c dE
1(
5
+
5
(#
X/
^<
.$
,$
# #$
03
#1
X
1$ $
!
3
1 "
+ =@<
'-
'- "
=@+@=<
B<
01 $
!
V< T
50 ""3
<
> # / V0,,$
<_
"
0
.
<
"
<5
+'
'15 _< $
$
"(
'
% $
5 ')< .
<!
(< "
1
$
@
c dMohamed GHOZZI, Mischa DOHLER, Francois MARX, Jacques PALICOT,
“Cognitive radio : methods for the detection of free bands”, Comptes rendus
Physique, Elsevier, 2006, vol. 7, n 7, pp. 794-804
c=dDuminda THILAKAWARDANA, Klaus MOESSNER, “A Genetic Approach to Cell-
by-Cell Dynamic Spectrum Allocation for Optimising Spectral Efficiency in
Wireless Mobile Systems”, CrownCom’07, 31 July-3 August 2007, Orlando, USA
c;d>
77CCC
c@d#>
(!
C
;
7
31 $
<4
"0
>
'
=R = < 5
c:d5 X Y
,< ]/
>
<^
/
"
5 >
+5 > < /
5
<*
' +1
=
, #< _5 >
(
"
cBd*
1
6
9
,# - #< #L
L
'00
B< = A
0 A +0
c
= d
T
,0 k#
VV [< _"
C
> >
+ >
+/
6
C
,$
'0#< _
2 _< $
-
"
" ,$ 3#<
'>
"(
B 1(" 3
, '
0
5
C X > <
%
'
< 5,< 5
% T ,)_<
1= _$
(" >
$
"' "S:< $
&
5
! "#$
<,
L !$
03$
<]
1(
"
3 C/
1(
B< B+
B< 1
$
,$
'0#< _ 3 C '
5
L <A
^<
Z
=
(
_
1= = _
"
/
=
E0*#$
" T< ^
^< 5
B< .
<
'
,
6
.
<
1< 1
2
R= \
)
(
+1" #$':2_<
<
9
,
c
= d*
(
V+ 05 0< 0
3
0 +1
'
1(" 3
B<
$
$
"
3 C X <
c
==d'>
*
"
1 , - 4<_
_< #>&
c
= dV
$
3"
0, 31< ]//#
< #> < /
<
, !-0 Z 4 ,< 4
>
> //#
T <! & <
:
+ >
,
B
+
9
,$
'0#< '>
<^ $
'#9= '
c d
c?d*
>
+
2
'5
$
$
%5 50'01 9=)<
>
, '< 0 '#0.
>
>
(
'-
$
"#0 -
504
(
c
=:d
5
"
+ >
1B
_
'
< Z>
.
c
=Bd5 >
9
>
.$
,$
# #$
03
" >
1$ $
!
1 "
'-
'- "
_
6
1
/
$
<
_
!-0VV$
< _1
2
>
<
"
2
S
!
, '< 0 '#0.
2
'
_< #>&
BA
B?
Résumé
Le manuscrit de cette Habilitation à Diriger des Recherches reprend quelques thèmes de mon
parcours de recherche depuis 10 ans, et que l’on peut résumer ainsi. De la conception radio
classique pendant ma thèse (à base d’ASIC) j’ai vécu la transition vers la conception radio
logicielle (dans un laboratoire de recherche d’un grand groupe industriel) et enfin j’étends
désormais en parallèle mes travaux vers la conception radio intelligente (dans un laboratoire
de recherche académique). Cinq travaux sont repris dans ce mémoire, portant sur la radio
logicielle et la radio intelligente. Ils correspondent à cinq des thèses que j’ai encadrées. Ils
portent tout d’abord sur la contribution à une approche de conception pragmatique pour la
radio logicielle orientée composants. La radio logicielle en effet implique l’utilisation de
ressources de traitements multiples et hétérogènes, générant ainsi des difficultés de
conception non encore résolues dans leur ensemble par la communauté de l’architecture. L’un
des principes repose sur l’indifférence qu’il doit y avoir entre traitements logiciels et
matériels, quel que soit leur niveau de reconfigurabilité. Un effort de formalisation de la
conception radio logicielle est proposé dans un deuxième temps, dans une approche orientée
opérateurs communs. L’idée repose sur la description à différents niveaux de granularité des
opérateurs de traitement du signal afin de bénéficier de la réutilisation de motifs de calcul. Le
travail d’optimisation consiste à sélectionner le niveau de granularité adéquate suivant les
besoins du concepteur. La gestion de reconfiguration, qui est un point central si ce n’est le
point distinctif de la radio logicielle est le cœur du troisième travail présenté. Sa
généralisation à la radio intelligente permet d’aboutir dans un quatrième cas d’étude à la
proposition d’une architecture de gestion radio intelligente : HDCRAM. Une modélisation de
cette architecture est proposée sous la forme d’un méta-modèle exécutable. Cela permet
notamment de disposer d’un simulateur de scénarios du comportement de l’architecture de
gestion d’une radio intelligente et de pouvoir la dimensionner pour chaque scénario. Enfin la
dernière étude porte sur la définition du concept de bulle sensorielle radio intelligente qui
porte particulièrement sur l’ensemble des capteurs d’un équipement de radio intelligente. Les
perspectives de ces travaux sont notamment d’étudier le troisième et dernier élément du
cercle cognitif simplifié (après la reconfiguration et les capteurs) qu’est la prise de décision.
Abstract
The manuscript of the present Habilitation à Diriger des Recherches covers a few topics of my
research activities during the past 10 years, from classical radio design, to software radio
design, and finally towards cognitive radio design. Five of my PhD students works on software
radio and cognitive radio are detailed here. First is a contribution to a pragmatic approach for
software defined radio design. It is a component-based approach. Software defined radio
implies indeed multiple and heterogeneous resources, which generates design issues that have
not been solved by yet. A formalisation effort of software defined radio design is proposed in a
second work, using a common operators design approach. A graphical optimisation tool is
proposed to select the adequate level of granularity of signal processing operations depending
on the designer’s needs. Reconfiguration management, which is the main distinctive point of
software defined radio is the heart of the third work presented in this document. The
generalisation of this to cognitive radio leads to a fourth work on the proposal of a completely
new cognitive radio management architecture: HDCRAM. A modelling approach of this
architecture is proposed through an executable metamodel. This also constitutes a simulator
of scenarios of the behaviour of a cognitive radio management architecture. Finally, last work
proposes the definition of the sensorial cognitive radio bubble that particularly tackles the
sensing side of a cognitive radio equipment. Perspectives to these works consists in studying
the third and last element of the cognitive cycle (added to reconfiguration and sensors): the
decision taking.

de la radio logicielle à la radio intelligente

Transcription

Documents pareils

Revue de presse ANGLAIS Semaine du 07 au 13 octobre 2013

ORAUX # NOM Prénom Titre O1* LE TILLY Elodie O2* FLAMMIER

Les vêtements/La Mode un collant un gilet un haut un jean un

Avec ces chaussures, il ne manque plus aux enfants qu`une

3 bedroom Apartment for sale

Papers of the Bibliographical Societ~y of Canada xxx On the other

Answers and Solutions - Digital Commons @ Butler University

The Solution to Drug Prices

Zing-E-Ling Répertoire Bob Marley

voir profil - Women`s World Summit Foundation