de la radio logicielle à la radio intelligente
Transcription
de la radio logicielle à la radio intelligente
* / 2 , +, ! "#$% -$ ,$ .0 1 , & % % 0 " 3#$ 4" % , #0 "% * 6 ,$ '0# %" " " , ' 7$ # ' 6 8' 6 9 $# ' " ) . ( ' 5 ) ) , ') 6 5 " , ' '3 " : :; () ) '- $ "#0 - 504 - " , '< 0 '#0. .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " '- 504 $ "#0 - - .$ ,$ # #$ 03 "055 $ 10""$ 1 #$ $ ' 9 > 1$ $ ! '- 1 " = ; > 9 > 6 9 ; ; ;= ;; @ > : ? ? ? ? ? > 92 > > #>& + ; ; : : : : : : B B B " A 9 ( > 6 > @ @ " A " , ' 9 : : : := :; :@ B '- " '- / = ; @ ' '- ' + 5 9 > 6 A C ( >& 9 2 A ? ? ? ' A A A A A = ; @ : 2 2 " , ' 5 ? > 6 ? ? ? ? ? ? ? '> = ; @ : B . ? ?? ? ? ' ' ' 5 9 ' ' & & 6 2 > > 9 ' 2 A = ; A A D > > > > " , '< 0 '#0. 5 > ; @ @ @ : : : B B B = '- $ "#0 - 504 - #$ # E F1 $ = 9 = = ; ' ; ; ;= @ , @ @ : B # 5 > > (6 + ! ! , > > > > >6 ' 2 2 ; 1$ $ ! '- 1 " '- '- " '= == =@ =@ =: =B =B @@ B= ? ? B 9 < ? .$ ,$ # #$ 03 " , '< 0 '#0. = ; @ ;B B@ B@ BB '- $ "#0 - 504 - #$ 10""$ " 1 , '< 0 '#0. .$ ,$ # #$ 03 '- 1$ $ ! 1 " '- '- " '- @ '- 504 $ "#0 - - !" .$ ,$ # #$ 03 # !$# 1$ $ ! 1 " %&" ' # '- " '- " " ' ( & ' > ,9 2 6 > < 1 >& < > ( & − − − 7 < 6 7 > 7 > 6 , 6 < 2 2 & ( & 6 9 > ,9 9 6 < 6 G < I , < H > < 6 + A ? > 9 9 , 5 J & 9 . 9 6 & 9 > < &6 < , > 6 / ! / ! & 9 9 A 6 "$ ' 6 9 A9 >& < & & 6 *9 ??:)< % < I 6 D 6 9 6 ( & 69 6 6 + K + , 6 ;<;@ !-L < 6 > "$ ' J 6 # 6 + 9 2 < 5 >& < A 2 M ( & *9 > > 6 K 2 ? 2 '9 6 & & > 6 > 2 6 9 H > 9 > K 6 > > > A9 & # 9 > , 2 6 5 2 < 9 9 '9 A % ,9 ( ( & A 6 , "$ ' 2 2 < N < : " , '< 0 '#0. ) 9 6 < < 9 G 6 < '- 504 $ "#0 - - N# .$ ,$ # #$ 03 1$ $ ! 1 " '- < , ( & 2 2 9 1 6 > > < > A + > 2 > 6 6 (2 < 2 6 # & > &6 9 > & & 9 > < 9 9 '9 '- " < 6 > ( & , +K 6 > 6 + 6 6 6 , > ( & K ( 6 < A 2 < > > 9 & + , > 5 O < 2 9 > 2 6 9 6 9 * > 9 > % )< 9 6 6 ( 2 K % 9 6 ) < J 9 , ( % 5 % 9 +1" ) 9 + 6 + > 9 < 9 )< 9 > 9 9 6 2& > 9 % M ( & & 9 6 K < 6 ) ' 6 '9 6 9& , 6 * 9 2 6 A > P *92 > 6 2 6 > 2 > > P * 69 9 Q 6 ( & % 9> 9 6 6 . > ( & >6 9> 9 < I 9 % G 9 H) , < > N)< 6 $ ( >6 6 % 9 > > D 2 " > 6 9 9 9> < 6 )< , '< 0 '#0. + 6 9 + + A B '- $ "#0 - 504 - .$ ,$ # #$ 03 ' 1$ $ ! > ( 6 < 6 2 6 > 1 " '- > $ & 6 2 > '- " < < O 6 9 *9 < 6 A 2 6 < 6 2 < $ 2 " , '< 0 '#0. < A '- 504 $ "#0 - ( - ! # 1 " '- '- " & !"& & 504 '> > : ?B / D 5 < 2 1 3 " / A " " ! 6 ??@+ ??? " 6 6 6 9 $# 8' " 5 , ' : :; ) $& #>& 1 9 $ 3" ??? S " 1 A # 1 * 5 ??@ ?? R ??@ ?? , ' , 'R' ' (( 1 1 . ' < 6 < # 3 6 1"7/#( T ' > 6 + 5 , 2 ;<@ !-L #>& !> U , V $ 3 * 6 '$ # 3 ( ' !0 # , 1< , . # ,< > 431$ -< > 1 $ "< 1 ' "# , $ 3< * 6 '$ # 3 < !> U , V $ 3 # & > / A ( > > O 9 6 9 $ 3" O 9 9 $ 3" " ' (* 1 1 1$ $ ! "!% !$ ' & ( 3 .$ ,$ # #$ 03 & "! ??? @ "( & -( # + % $ @ > > 5 > " 7> (, # !$# . &" ' # > , '< " $ # +#', " 6 − − > : % − − − < = = )< < < ; & 2 < − & 2 < − ; 9 & 2 < − 6 " , '< 0 '#0. & < ? '- $ "#0 - 504 − − − .$ ,$ # #$ 03 1$ $ ! & > > A ? − 3#, : + − − − = ; : 1 " '- '- " 2< >< > < B >& ) + A ( % A 3 # 2 : >& 5 B< / 9 9 − − A 2 92 9 5 − ,$ 03 3# 9 / + A " 9 # − − − > 6 5 > "1 / ??? = − − − − ; A = 3 # 9 3#, > ( 6 A 6 ! > & $ "$ " % ) O $ ' : & 9 5 M ' 2 " % − ' % :+ − S $ 3" − & B + =& % ??:+ ?? ) '> ' " ' ?) ( " ' " 1 6 − (% /' # 21 # 2 , S 5 S $ 3" " "' # =& '3 " : B@ % 2+$ # ) % ?? ) S1$ % ) ?? ( ' 1 . $ $ / !0/$ & # 1 2 3 ! (44-5 % >7 ) − " 9 6 6 %=>7 ) − 6 6 %:>7 ) − 6 > 6 %:>7 ) − " %:>7 ) % ;>7 ) " , '< 0 '#0. 6 9 $ 3" 9 $ 3" ??B − & ) '- 504 $ "#0 - − − 6 6 .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " 6 %= >7 ) 6 %; >7 ) > A − 2& . $ /$ & ' ' # 2 A % ( >& = ) # 1 %;<@>7 ) − ' % ;>7 ) − 1" ' $ − , = " & 3 > 9 > % < % ) ! (44-5 > > & 2 A ) "1 6 ) $& % /' # '> A 2 ( & > / ; " ! 3 ! (4465 − , %;<@>7 ) − ' " ' %=>7 ) − / + - /$ & % 2 6 % ;B>) − 5 # 1% # ' 3 % > % : $& / 5 >) R 2 A − − − − # , > 6 + %B >) 6 A 6 A ' 6 ' % >) R %; >) + % @>) R A " − 9 1 " , '< 0 '#0. 9 $ 3" '- $ "#0 - 504 - * $ # " " *9 .$ ,$ # #$ 03 7! &" ' # K < 96 " 96 "' , '7$ # " #> ' 6 C %" * > > > < ' 6 1$ $ ! > 1 " '- '- " 0 > 5 6 > $ # < 6 ) ,$ '0# > %" C R ) 2 > > > − − − − > ( > < > 9 > % 2 )< − − # 6 < < < 6 − + > < > > < /$ 5 > − > + > & < − < − + > % 6 "( 1 2 , "& , 1 > > )< > 5, $ &" ' # " " / ' + 1 # >& 3 , #'-$ 1*0 >& > > 1 >& ' + , ? W " #> * 6 ,$ '0# / , / 4 '> > 504 < W" , ' , '< 0 '#0. 6 W @ W = W '- $ "#0 - 1 504 B ' + 1 # 1 ' + , 1 : ' + 1 # 1 ' + 1 @ ' + 1 # 1 ' + =+ B .$ ,$ # #$ 03 1$ $ ! 1 " >& 5 > 3 E$ 3 >& 0 B " "! &% ) &$ 8 9 + " +.! &;% 8 8 +8& & >& E " 1 : W '> > 504 ; W @ W 3# " <@ W" , ' >& $ & $$! "& >& >& 0 " # '- '- " : &$ "&% % : + & ! , : % & & * 6 '> " ." % % ,$ '0# > 504 !% < & W W : /$ & >& 0 , U !01 1 >& @ " "! " +.! &;% # # ./ 1 # & " 7! ;!# ! % & " +.! & #" & % : /$ % %. >& * 6 ,$ '0# @W / 30 E , @W '> > 504 W >& * + > 1 , - 4 B " , '< 6 1 >& 0 = # % & <) $ # # ./ " +.! &;% = %"& % & & ." % % * ( 5 > ! $ 3< ! ,03'-< * +/ D - , , 4< * 6 ,$ '0#< * +, -$ ,$ < . ! 3 1 >& * 6 ,$ '0# :@W ' + * +/ D - , 1 @W '> > 504 = W 5 # & > A # > ' "1 / 9@< > < " & >& $ S ( & ' , %' 9 6 9 1! ,S ' , 6 2 % & & ' + " , '< 0 '#0. 1 1< ) R = '- $ "#0 - + 504 : - < * 1 # " '- '- " & 1 ,0 5 9 $ 3" " +.! & # &! ! B : &$ 7! !" ) & ! " 0 3 /T1 # 9 #&% & . #$ A > # & : ' + E 1 01 $ ! 9 . $ % %. A 9 & & X (< " " (< T < !" & &$# & 5 < " B ' & ,* "( + < + 5 : " # C * T > - '- 5 3$ 5 9 & ;!% % & 9 % %. , '< 0 '#0. 9 & ( Z +" $ 1 9 / " " : /$ ' &. : ? B < Z ' Z > % %. V + # " & ! % : &. ! % & & 9 $ $ "1 / 9B< 1 7> > " , ' % @ ; 1 " >& 5 X Y , # : 9 $ 3" < 6 >& 3 $ & #$ & % & # %. ! $;& 7!# ! " "! & &% % /% * ( 2 .$ "$ F< 0 1 /0 ! "< '> > 504< 5 > $ 31 E0$ 3 < * +, -$ ,$ < * > 03"$ 3< 0 " 3#$ 4"< 4 "0 , 1 >& 0 1 /0 ! " @ W ' + 4 "0 , @W ' + '> > 504 ;@W 5 # & > A * " R 1 5 > 1" "( : & >& $ > > 9 $ # 7$ 3" > + > & 6 B+ @+ 1$ $ ! ' + " 1 # ,( :+ .$ ,$ # #$ 03 >< '- $ "#0 - 504 5 X Y , # " 9 9 # # %. & %"& % & <+ $ 2 ! 4" 9 %"& & + A " # 5 " # ', E 1 ??B = 5 " # = ! " # $ " # '- '- " $ 3" " % ." % % $!% < 9 ) $ " 5 / $ !" ! % 60 $& # % % : &. ! A% ! % !$# 7! ! 6( $ ! # ! "& &% + + ! < +%3 D 2 " ?? 5 / " # 1 " $ /"$ '< < #% ;!# ! % " # ! #" ! & ', [ $ 3 9 9 ! @ " " !$# 7! C &' ". + ; ! ! '0"[ 1 9 $ 3" &$ &. +#7! % % ! %& ' " # 1$ $ ! $ 9 #" " ?? 9 $ .$ ,$ # #$ 03 $ 3" #&% & D 1 # &%$ &$ " &. $ !% % % # 7! ! 5 ) B ! #$ " ! F &% < & &$& ! E@ / . 3 9 9 & $ #% #" $$! "& , # 9 " % B ' " " $ 3" + % !0 * /$ ' # ! : /$ , /$ .# # & Y5 - 9 $ 5 33 1 ""< $ & " $$! "& " $ 3" & " "! ) "< . : 0 %"& : /$ #% #" $$! "& %$ % & & !$# 7! ! " $ , '< 0 '#0. &% . & ) & &!0 $ @ '- $ "#0 - 504 - ,, A > 9 B 9 F , .0 1 " ! % / # @ 1 # - % "% % : " 9 ! & 1 " '- '- " 7! , ' & &% % . ." % < # &;% !$ $ $ & : /$ - & ." % & " " ! % & & & " & , ' 5 - "9 ' 3 > ( 3# ? X # (%3# )< " " 1 B C ' 9 RB ' #> ! , U !01 > E 2 -( !% < & ""! % " : 1$ $ ! " : /$ $& "& . B .$ ,$ # #$ 03 " 1 9 ' & Z## 9 2 1 %# & . 9 1 9 "$ T 55, : 92 , '> < +5 %# )< Z$ ## 3 Z'05< @ B @ * + > 1 , - 4 9 . < 9 ' @A R @ ' 6 ( > 6 ' % ! ,03'@ &; % # " % %" ' 6 <# ! ! !$# #"&% '! # 3 1 , ? A ' + : * + - 3 31 V '> > 504 " , ' , 104, # (' 4X >" 3 1 T " , '< 0 '#0. 1 %$ ( ) # X( %* ) ')< '- $ "#0 - 504 - 6( $; "$ # &% & ' C ' SB $ Z A $ '## 9 6* 6, ;A : #> ( * " " 5 # < = $ ( < " &% " & 1 > !: # > Z A ' < 3 C X : ' < > < / >& / " 5 # 1 " 5 # < / " + # . ( #- ,0# B : 9 $ 3" 6 # %. " &! < ' &! ! " "! " +.! &;% : &$ 7! $ 5 > ! $ 3< ' "" < '> > 504< / 30 E ,< 1 6 -0 V #< , #0 " ,, 0 : # : 9 $ 3" 6 # B : / " < "( < B+ & * : > '- " 0 < 0 1 / 1'< " < :+= "1 "( ' 0 < 5( <! & < + " " < " '> L " C 1 / # > " < ;+ @ " C S: '- " + # ' # 1 " ' " C Z > " ' '05 " "1 9@ 5 7! # "1 9 C ' 1$ $ ! ' $ & ' " ' 3 C X += \ B = ' 1 .$ ,$ # #$ 03 $ & % & ! % & " : ) ( * +, 1 T 4" < 1 '> > 504< 5 > #0 " 0 F , @ #% & # # ./ = 9 6 & &"# & %"& 1% & , '< 0 '#0. B B $& %"& ! " +.! &;% 6 -0 V #< 3 > * ,$ 3< $ 31 E0$ 3 < " 33< , : 6 " #% & @ $$& : /$ . B ! & " , % & <) $ ." % % B '- $ "#0 - 504 * : ( '> 31 504< / 5 X Y " 5 # * ( < * Y '- 5 30 E ,< * : & ' C ' Z9B $ '## 9: ' 1 $ ! & "$ "9@ G & "% &% '! < < " M ' < " " M 5 ' ' 1 "( ' # > / #> < ' < < < 1 $ ( < < "( < B+ "( Z ' < ! & < B+? ' ' 0 X ' < 0 < " < = B Z < </ < +; B $ ' $ # > / #> ( < "( < ;+ : Z X > " "( < +; @ Z A R = < < >& " # > , 9@< ' 9@ ] 4< '> > 504< * > % " )< ?? '0"[ _' 6 > 6 " + # < < =+@ $ 0 " C 1 / 9 _5 1( * + > 1 , - 4< "1 / # > ' 2 " > 9 $ 3" $ < @+ > $ < = ' 1 $ " $ 3 C \ 9B < ! ( !0!3$ #< '> -$ ,$ &% / $ "Z '9 '- " & #$ & % & # %. ! $;& 7!# ! " "! & &% % /% 2 .$ "$ F< 0 1 /0 ! "< '> > 504 5 > $ 31 E0$ 3 < * +, -$ ,$ < * > 03"$ 3< 0 " 3#$ 4"< 4 "0 , 1 " $# ' $# ' < $ '## 9 '- $ ' 8 1"19 1 " 6 & + # " $ 5 '9 +, 1$ $ ! , # 6'! 1" $# $# .$ ,$ # #$ 03 . R5 " _ , '< 0 '#0. C _ 6 ,$ '0# @ ^ < '- $ "#0 - 504 - H > H " 5 ,$ 03 >1 % % &; & & ( E / > # ,$ 03 3 ( E ,$ 03 9 '- '- " (%3# ) / > # 5 " " 1 9 > 5 X 3 E$ 3 / 1 5 1 " &% A 3 1$ $ ! "% % &; & + 9 / .$ ,$ # #$ 03 B " (%3# ) " 1 9> / B+ H( > ! A 3 Z'05`` # 2 92 3 C X 7 2 Z 92 $ '# A '05 `` B& + B a+ > $ $ = (% "' + + A :+ = ) (R > $ "# :& B = R ; " ( , ' + 3 Z'05 3 C X @+ 2 Z 92 B $ "# '05 :& "' a + + A ;+ , ( $ "# @ " A C C " ' , A ( B& + ? " " , ' , '< 0 '#0. ? '- 504 $ "#0 - - H* 50 '05 > & .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " &!0 5 0 + '0 51 "0'7"0 ' 3#, B+ ? B A $ 1 05 O $ $ 8 2 6 2 3 # :+ B A =" O 6 3 #7 3#, =+ @ > $ 7 8 2 "( & @ H, ,54 # 5 > 5 > #. &!0 + b . > + A 2 ?% O HA 1! ) > 1 $ = " + 9 > > 1 5 2 2 ' H6 8 , ' L !; %" " 7 + +1" 1'71 ' !"5< 1! < 81 % # + 5#" /11< . # > ) = A 5= 5= % 2+5 < T( ;+ > " +' @ A / 2Z'15 >M 1" ': & % < ': ??? + " , '< 0 '#0. 7 =< ':; : !3 ) 5#" /11 ) '- 504 $ "#0 - - I !" I cd " '> & 1< '> 1 _< @A > 504< * 6 > # > 504< 5 X Y ( "( #< * +, $ -$ ,$ _< '> # > " '- '- " B &% , !-0 Z 4 ,< '> / # 0 "1 6 _< ," E$ " $ A cd , U !01 > 6 c =d '> 1 * '! 1$ ! ! ,< + 1 " !' &. 0 F ,< ! (!0!3$ #< * + > > 504< _/ 511 ' # 2 X" > ( ( 511< $ ( <" :< + = # 1$ $ ! +7! & I( cd " 1 5 " .$ ,$ # #$ 03 > 504< 4 0 ' ,$ '0#< _ ,0 #< _ 3 !/%/ ) / 2 5 5 <" , #< _- >+, 5 + R" +5 ' > 1 ' 1 1 +/ " C _< > " $ = c ;d E 01 $ ! + * Z V< '> > 504< * 6 C _< Z (Z ' $ ' <" C < B< ;= R @: <" "( c @d 5 X Y , #< / . 3< * 1 /0 ! "< 4 "0 ,< _ "( 5 !+; " 0 " R T C > eE @@7 " 7 :7:;=:? c :d " ' > [ L < , #< '> "1 B< B< A I* ' cd '> 3 ' > 504< 6 9 " 3 , cd '> 3 ' > 504< 6 9 " 3 c =d '> > 504< ,$ '0#< _$ > 5 1 '0 < _1 D " 6 ' '0 < _1 D " 6 ' +5 . "( _< #< _ / / /1 1 9 / _< f/ B ?@< , " B ;;? 1 9 / _< f/ $ "< D " ' +/ D 3 V 3< '> > 504< 0 ( 5 5#" ' , X_< * 1 5 > 1" "( < :% :)< $ 1 :;=:?< = + > 504< >1 < , Z ' ;< ;;+@ T0 3#0 X P #> + , '< 0 '#0. 2 B ?: .$ "$ F< _ 6 _< f = ? ??? @ + '- $ "#0 - 504 - I, cd '! > - '- 5 3$ <* $ _< 6 < Icd + # cd " # 5 +" $ 5 'SB< ' 6 ! ,< '> > >& < ) ,$ '0#< 1 L </ % ! c ;d '> "Z 1 C =% > 504< > _< $ cd " # * 6 '> cd . $ c =d * ', # " 5 2 / 2 _< $ > @< .$ "$ F< 5 X Y , #< _-Z (" C Z X > <. < " < \ & 5 - "-< '> > > 0 ( &% &' " &" > 504< L "1 > $ E$ 301< ,$ '0#< '> > 504< _#> _" _< "$ < #> FF$ F! < '> % " )< #$ 3< 6 > 504< * 3 /T- < * ,$ '0#< _ > C 3 /T- < '> ,$ '0#< ] / ! 3 ' > 6 < Z" S < 1 ,0 5 < " < =+ ,$ '0#< _#C " +! ( 2 $ 5 > 504< ^ > ^< ""'$ ' @< R : ' ' ) > " '- " ' # ( ,$ '0#< _ 6 ;=g;B< T " ! $ < '> , '< 0 '#0. > > 504< / ^< 3 Z' "< E0*#$ " T< > "0 , 1< '> E$ 1 ,< * +'> > , , 33< _ (_< 5 # Z X > 1 # ' 1< '> '- " > 5 - "-< E$ 301< '> > 504< * 6 2 ( / . X > " <_ ' C ' S < @+ B <" % > ' 0 Z 3 C X '05 ) T0 1 $ <1 '- 5 <* 1 5 > c :d , U !01 > Z X > 1 " > 504< _ 3 C " _< "$ '0SB< L < '0 ) 6 + # " " $ # %"! 1 ,0 5 < * O 5 5$ 14< , 4< * > <5 ' _ , C' ' ' c Bd > 504< * 1 B% 6 &% ,$ '0#< '> # $ !$ $ < '> "1 ( " +" > - '- 5 3$ <* _ ' > 504< _, . B & ! ,< > ,$ '0#< _ ! > L _< "$ < #> FF$ F! " < '> % " )< c ;d c @d ,$ '0#< '> @ @?< T0 3#0 $ "< 0 +#> + 1( Z'03S =< " C Z '03 ) I6 1$ $ ! &% " > - '- 5 3$ <* 6 " ' B% " B c =d * & .$ ,$ # #$ 03 + @A ,$ '0#< " > 504< * Y 5 # ' + <5 >< ; 5 > > _< @ >T >< > < > 504< * 6 ( _< @ > '- $ "#0 - T 504 > Z < cd " '> X > " C < Z" S < 0 F ,< ! (!0!3$ #< * > 504< _"( , 1 * +, + </ < "( ) c ?d E 01 $ ! V< '> + $ 5 'S:< X< ' ) .$ ,$ # #$ 03 1$ $ ! =@+ ; < T + > 1$ ! #< * +, C > 5, + < : %$ "( > 504< * 6 : %$ ,$ '0#< _ C X > ( + $ 5 01 $ ! +" < Z" S:< c d'> < > 504< 2 ' ' BB@+BB? %$ c@d 5 X Y , #< 4 T0 3#0 $ "< '> #$':2 "$ '0 F$ " $ , 5 3 <* > 504< _"( 1 +# " '0 c:d $ "< '> > Z < + cBd T0 3#0 _< " T > >< T0 3#0 _ 5#"< !"5< < < $ "< '> ' " 1! . # ?:+ %$ c d'> _1 > 504< $ /$ > " C %$ "( c?d T0 3#0 S:< + 0 +Z _< . <" @< ) . 3< * +/ D 3 V 3< '> > 504< 0 T / 1 0 _< "$ '0S@< ( %# 6 ()< " @ '0 ) c;d* . T 3"$ T< 1 , 3V< #> Z$ .T < " # / V0,,$ < 2 .$ "$ F< '> > 504< - ,$ 5$ '< ' 10,Z$ 3< _5 ' # _< #> T > Z X > @?+:=< T > >< < B+ ; ( T C ' V< 1 !$ $ < _0 " C +1 + :< T >< .$ "$ F< " > [ , #< _ "( _< $ 5 'S@< . $ 5 ' c=d 5 X Y , #< / 1 /0 ! "< _"( 1 2 2 > % " $ :< > _< c d '> 1 ; >T 5 '- " -$ ,$ >_< $ "9 $ ,$ '0#< _/ ' "1 "( _< ' ' ' ,$ '0#< E 0 " C '- >< c d, U !01 1< '> > 504< * 6 5 ' 5 * :< 5(X <! & < + @% Z 3 C X '05 ) > 504< * 6 ' > Z X > < : 1 " +/ D 2 2 5 ' ) 3 V 3< T 1 " 1" > _< % < =+: > 504< _ X > " C " C < Z" 9 < > 504< , 5. 1< , '0 ( " C . 5 + # >_< E#' / S < ' (< " < + E > # > (' ) T0 3#0 $ "< , 5. !"5 1! 5 _< $ "#S <# > <$ # ) $ "< '> A > ! ,#'- E< > T0 3#0 $ "< 5 > " C < Z" 9;< > 504< , , '< 0 '#0. 5. 1< 71 < += , '0 1< , '0 ?+ ;< < < " > <_ = '- $ "#0 - 504 " C $ "" S $ c d'> 1 E > <T "( , < c d'> " ' (< "( ! & cd , U !01 1< 0 1 / # > ' + > ' . + > c @d " X , # > 1< "( / V. < _' ' <* ' 9:< 0 01 $ ! + ( :< 5(X 6 " 6 . & , ; 0 F ,< * 1$ 3< * + "5 E > < " < . ( C " _< "1 # > 504< , '< 0 '#0. + $ B< , 4< " C SB< +@ A " #< '> " 6 ' + ' " B ,$ '0#< '> > 504< * # " SB< +@ A > 504< '> C _< "1 : / ,$ '0#< _ > ( < 5(X > " ' <! & + > 1$ ! #< 3 . ,# <* !0 . 1< '> > 504< _ 5, / "1 "( _< "1 / # > ' @ " < _/ <, 4< _' "1 "( B S:< ;+ A _< < ;+ A cd * + > 1 , - 4< , 4< '> > 504< * 6 _5 1( "1 / # > ' 9@< > < " < @ c ?d " !0 , '0 " S &% &' " &" % ." % % C ,$ '0#< '> 1( ' Z c Bd '> > 504< _ Z. R T ( 3 Z'05 Z X > > $ "# 5 =!_< 2 > 4< '> > 504< _1 "1 9B< 1 < " < @+? > 504< * _< $ "# 5 <! & '- " -( " @ !-L _< $ """# S ? <" >$ ) % & & ,$ '0#< 4 ,0 ( "1 % " )< V< '> _< '- @! %$ ) 5. 1< _ > > Z X > > 504< < " < @+? , > ' 1 # > 1 , - 4<* 6 / ! _< $ "# 5 Z <- B< . c :d E L > Z 3!< '> 1( ( 9B< 1 3 /T- < " ! $ <* 1 , - 4<_ _< $ "# 5 Z ' <- > 5. 1 " ) " + # % $& 1 , - 4< _< "1 c ;d * _ $ "< , " C 1$ $ ! ! ; @+; < > 504< !> U , V $ 3< * 6 '$ # 3 <_ 1"7/T # , X 6 " < +; ?? < @B +@B@ %$ " +" # > 6 IG c =d T0 3#0 / % > T0 3#0 $ "< '> > 504< , ( " C "( _< / T :?+B;< T > >< < ?+= T ( cd * > # # < 5 ( < =+ : \ " > 504< $ / 5#" # < " < @+ A "( c d .$ ,$ # #$ 03 : ,$ '0#< _< "1 > ' + C X $ 5 9@< '- 504 $ "#0 - - c d'> > 504< >1 < " > T0 3#0 $ "< >1 < _ "1 , C - >1 _< "1 / ; [ C .$ ,$ # #$ 03 , #< 2 $ # > ' 1$ $ ! 1 " '- '- " .$ "$ F< >1 < ' "( < > 2< " < c d '> > 504< 5 X Y , #< " 0 F ,< * + > 1$ ! #< ! ( !0!3$ #< > 1 "/ 4< 3 . ,# <* + !0 . 1< 4 1 3 /< _ 5, Z " "( _< "1 / # > ' < > 2< " < ; c d5 X . 03V ,< L" $ 1 ,< * . T 3"$ T< 1 .$ "$ F< '> > 504< T0 3#0 $ "< 5 Z ,# < " T $ , TT #< , 5 < #> . ! 6 _< $ "# 5 Z " < ,( < A ; c=d T0 3#0 " $ "< '> > 504< ( + >+ 3 C X Z C =% c;d'> " 3 > 504< !> U , V $ 3< * " 1"7/T </ < =+ B A ?? < >"( ) IH cd / ?? % 30 E ,< '> 2 + Z X > '0"[ < '> ' / cd '> " < :+ ??? % ;! # <* + < _ 5, < 1 , . ,# _< "1 / / ) # <* < _E 3 " c =d ' 1 " > 504< !> U , V $ 3< * 1"7/T < </ < +; ) 1 , 3 . ,# ZZ / cd & # '> E$ 0#< _$ _< Z5 S ? < ) 6 '$ # 3 <_ > # '> ??B %" # &. .7! (+. / ! ( < ;+ @ "$ # -( _< * + !$ & !0 . 1< ! (!0!3$ #< '> C C ; %Z Z >/ ) + !0 . 1< ! (!0!3$ #< '> "( ' > ( ( > <5 ( < < ; %" > 6 / #C ' > _< $ "S ? < &% &' "&" 1 > " < $ &%1 "& &"/ & <! & ) > 504< !> U , V $ 3< * 6 '$ # 3 < _5 1 # > 6 ; @ !-L _< *35S ??< L& * 3 5 + ) I 4 cd 3 <_ > # ?B % > 504< 0 '5 ( 5 + # %"! .$ "$ F <_ 2 _< 3Z$ < 5(X $ > '$ # < ./ II cd 6 2 , 3V< 2 - ,$ 5$ '< " < _/ ' "1 / > < " " &!0 > 504< _ > 504< 1 C 1 < _" C + _ , '< 0 '#0. @ '- $ "#0 - 504 - I cd '> " # #>& #$ " > 504< _' -( 1"7/> 6 +5 1 <$ 3" cd '> 2 & c ?d 5 . X c d, ' 6 > ( B "$ 1 5 > 1 2 > > 1 -Z 1 B< _1 1 B R _< B C +$ $R 5@ < _$ Z @ ( 7 _< A B > -Z <" 2 <$ $ $ 7Z @71@ 7 : > > 1 > T (' : 1 C X > $ _< A ; ?< _1 ; ? R / : "Z 5 -Z7"Z 1 : _< / > " B< . C 7 C > \ : ( _< \ : 3 Z'05 Z 1 @< _1 1 @ R L C X_< @ c;d, : > _< <1 +$ $R 1= _$ (" > c=d , $ < " > +$ $R 1@ ;< _# X _< A +$ $R 1@ < _ 6 _< $ "#+ @+ BB ;7 c d, > R" +$ $R 1@ < _1 _< c d3 Z'05 _< c@d , < _1 ; B 3 Z'05 Z 1 > / > +$ $R 1@ =< _ "Z 5 ??@ B ; cd , < :A 1 _< c ;d 3 Z'05 _< c Bd , / 2 ; @ !-L _< ' <$ 3" 6 ' ; ;< _1 ; ; + " c :d , 6 2 2 S _ c =d 3 Z'05 B C X '- " ??? 5 1 ( > 3 1 ' A +$ $R 1= = _ c @d , '- ;! &!0 #%' &;% "% % &; & + ! # +$ $R 1B < Z> . cd , # , < 1 " #! 4 cd , 5 " 1$ $ ! " S "( & #( T > 504< _, "( & 6 _< 5 4 .$ ,$ # #$ 03 + ( 3 Z'05 Z _< 1 ;< _1 1 ; R @ / 2 1 3 Z'05 Z > 1 _< 1 =< _1 1 = R @ ! / " , '< 0 '#0. 2 C X 1" '- $ "#0 - 504 - 4( > $ 1 05 cd , $ 1 05 < _' > c =d , A $ 1 05 : <_ A 3 # =" R _ c @d A 3 # =" R _1 c :d A 3 # =" R _5 c Bd A 3 # =" R _ 4* cd '> 81< c =d c ;d '> #> 9 6 _ & _ _ > !; " " %" " ! ;! _< 5 , > 504< # 81< </ <@A # > ( > </ '0 < '> <5 > ( 81 > '0 <_ > 504< _ $ 5 5 > </ '0 <_ 81 & ,$ '0#< '> 2 6 3f ' 5 +/ # 81< < ? R > " # </ > _< , 81< < _< 5 , > $ 5 R : _< S > 504< _ 81 c Bd _< B 9 > 504< _/ > 4, '- " S _ > 504< c @d '> c :d * > & ! ( > '- _< T0 3#0 $ "< R $ $ > # 81< </ < B / 1 " =< _" c ;d 81 1$ $ ! "% % &; & + +& J& cd , cd '> .$ ,$ # #$ 03 & " 5 $ + _< > R ( > 5 < ? $ 81 _< " > 504< _, +h ' _< ' 9 := ?B< > 1 cd S >& R c ?d S >& R " # f?@ =@ ??:< ??:< , '< 0 '#0. > 1 > 1 # f?@ =@ # f?@ =@ B '- $ "#0 - 504 - " , '< 0 '#0. .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " '- $ "#0 - 504 - #$ # " E F1 , '< 0 '#0. .$ ,$ # #$ 03 1$ $ ! '- 1 " '- '- " '- ? '- = $ "#0 - 504 - " , '< 0 '#0. .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " '- $ "#0 - 504 - .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " ! " , '< 0 '#0. = '- $ "#0 - 504 - $ .$ ,$ # #$ 03 1$ $ ! 1 " 2 < 6 9 < 9 ' & < 6 6 ( 2 < A # 9 < 4 , < > <A 9 *9 & A 5 > T0 3#0 2 * 6 < 6 < A ,$ '0# > " 6 9 9 > > $ A & ( 9 L!> U , V $ 3 6 * 6 2 > > 9+ + - ,,0'0 6 9 & O > 2 9 < A > > ; 9 6 , $ " >& > < > < < A $ 9 9 2 * < A 9 > 9 > A 96 ,9 < 2 A9 > < 9 *9 \ > > 9 6 & ( 9 2 26 A = 6 < 6 , ' < A 9 @ 9 2 < < 9 > 6 " '- " " $ $ ( >& > '- " , '< 0 '#0. 2 > & = "' 96 9 K * 6 + 6 " 9 < 6 '- $ "#0 - 504 - ( G c =d c ;dH< 6 6 6 > >< > > 2 9 6 A9 > & < > > < 1 " '- '- " ' 9 < , 2 % 1895 1901 1947 1948 1970s 1980s 1991 1995 1995 1996 1996 1997 1997 1998 1999 1999 2000 2001 2002 2003 2003 2004 2004 2005 2005 2006 2007 2008 < cd cd > > 2 6 6 6 6 9 > 9 + < A9 / 6 < ) .! 1$ $ ! !" ' , 2 .$ ,$ # #$ 03 K < A9 > ) & 6 9 ( K 6 6 6 - MARCONI transmet des signaux radio sur 2,4 km. - Première liaison radio transatlantique par MARCONI. - Invention du transistor par BRATTAIN, BARDEEN, SOCKLEY. - Théorie de l’information par SHANNON. Fondamentaux du numérique. - Programme ICNIA de l’US Air Force : radio embarquée sur la bande 30-1600 MHz (abouti en 92). - Premiers systèmes radios à µ-proc. pour contrôler les circuits analogiques de réglage de fréquence. - Introduction des premières radio VHF multi-mode dans l’armée américaine - Premiers systèmes radios à base de DSP pour le contrôle du modem en temps-réel. - Amélioration des convertisseurs analogiques numériques. - Tactical Anti-Jam Programmable Signal Processor (TAJPSP) pour l’opération simultanée de formes d’onde utilisant une approche modulaire. Ancêtre du programme SPEAKeasy. - Première utilisation de l’expression « software radio » par Joe MITOLA. - Premier IEEE Communication Magazine Special Issue on Software Radio. - Première démonstration – SPEAKeasy US DoD. - sujet de thèse orienté conception d’ASIC pour le canal de diffusion troposphérique - Création du MMITS Forum qui deviendra le SDR Forum en 1999. - Premier groupe d’étude sur la Radio Logicielle au Japon. - Ré-orientation du sujet de thèse vers une implantation dans un FPGA - La Commission Européenne sponsorise un Software Radio Workshop à Bruxelles. - SPEAKeasy II démonstration d’un système multi-mode multi-bande. - Investigations sur les plates-formes Mercury à base de PowerPC, mais trop tardif par rapport à l’avancement de la thèse - L’US Navy se dote de la Digital Modular Radio (SDR de 2 MHZ à 2 GHz). - Initiation du JTRS (Joint Tactical Radio System) par le DoD. - Première utilisation de l’expression « cognitive radio » par Joe MITOLA. - Adoption du modèle de démonstration multi-mode multi-band conjoint entre la France et l’Allemagne. - Deux numéros IEEE Communication Magazine, un IEEE Personnal Communications magazine, un IEEE Selected Areas in communications sur la radio logicielle. - début 99 : Création de l’activité Software Radio de Mitsubishi Electric ITE-TCL - mi-99 : acquisition d’une plate-forme Tx/Rx et premiers développements multi-standards - septembre 99 : représentant de Mitsubishi Electric au SDR Forum - démonstrations multi-standard en FI pour la partie modulation de la couche physique : UMTS-FDD, GSM, EDGE, BlueTooth - début d’approche de conception multi-processeur hétérogène de haut-niveau (de type SynDEx) - Premier « Report and Order » de la FCC sur la SDR. - le CRC (Canada) développe la première version open-source en Java du SCA. - deux numéros spéciaux de la revue des « Annales des Télécommunications » - première démonstration temps-réel de reconfiguration par téléchargement (OTAR en FI) d’organes de la couche physique (modulation EDGE – application vidéo) - présentation du système de la démo dans les locaux de VANU Inc ; à Boston - projet RNRT A3S : approche de conception de haut-niveau (de type UML) - La FCC définit avec le SDR Forum la Cognitive Radio et la situe par rapport à la Software Radio. - Le projet E²R est initié par la Commission Européenne. - Première certification d’un produit commercial SDR de Vanu Inc (USA) - participation au projet E²R - travaux sur SDR et UWB - Le ministère de l’intérieur et des communications du Japon change la loi sur la radio pour permettre l’introduction de produits SDR dans la bande des 5 GHz. - équipe SUPELEC SCEE / IETR - participation à NEWCOM Project D et Department 4 - participation au projet E²R phase II - participation au projet IDROMel - RNRT - participation au projet MOPCOM – RNTL - SUPELEC est l’une des 7 universités internationales à participer au Smart Radio Challenge organisé par le SDR Forum : équipe MENHIR (Modem for ENHanced Interoperable Radio) dont je suis le tuteur. - éditeur du numéro spécial sur la radio intelligente de la revue « Annales des Télécommunications » 7! % & & " % ." % % , '< 0 '#0. % & & % %. K -L K GL == '- 504 $ "#0 - ' - 2 > > .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " >& / K 5 > ! 9 > #>& 5 X Y , # #>& * + > 1 , - 4 + #>& " # ! , #>& , U !01 1 + #>& > - '- 5 3$ ) .! (< $ , > &'&!0 > / 9 "& & < 2& , & > 2 9 >& : 2& E 5 X Y > >& < 01 $ ! 2 " V6 6 9 > # , # 6 A9 5 >& < + @ < >& > > * + > 2 6 J :< 1 , - 4 > K >& 2 1 > 1 ! , & 9 > , < 6 2 & >& 2 , U !01 9 > 69 9 > 1 6 A9 * 2 + > ' 1 , - 4 ( 9 < 9 ( & < 69 6 9 > >& 9 > I =; # " , '< 0 '#0. 6 > - '- 5 3$ 9 96 6 2 > < '- 504 $ "#0 - *9 6 > .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " > < 2 < 1 < A > > 6 '9 < 9 A9 > 2 O O 6 A 6 69 L >< 96 % 6 > K < < A9 2 2 6 < 6 < 96 < ( ( & ) # 2 2 9 A 9 A 6 1 9 < ( * & %& * " & 9 6 % ." % % 6 69 9 , 92 6 K < 9 6 6 6 69 K ,9 6 > 6 9 69 ( ( < < < " < "$ ' # 2 > > K 9 > > % " )< 9 " 6 " 1 6 < A9 < < 2 6 A ,9 < 2 9 6 6 < A ) " A cd cd c @d $ & 2 9 9 6 L 6 A9 2 > 2 9 > 2 ( 2 2 6 > 6 A O ( & 9 > & 2 6 > 2 5 & > < 9 ( > < 9 c Bd' 1" < / ! < 9 $ 9 6 c :d c ( dc (=d , c (d , > 6 % 9 9 2 6 9 ( & <A 6 6 > 6 < > " % , '< 0 '#0. + & < > 5,) c (;d c (@d 3 =@ '- 504 $ "#0 - c d 2 2 2 1 .$ ,$ # #$ 03 1$ $ ! + < A 69 1 " cd 9 6 6 9 < , 6 & 9 , $ ) 0 ) 1 1 2 *# " c d 5 # < & "' "' 9 69 O 9 ) > < " + 9 2 2 "' > c d < A9 "( 6 & 5 > A c (:d < 9 ) 9#"$ c?d 9 A < c ;d 5 > A /'' ' c d < < O "''; c;d B< > > ) 2 2 2 >& 9 $ 6 Z!; " 2 " , '< 0 '#0. 2 < A #E 1 69 =: < % < ? 9#"$c?d c =d < 9 9 < 2 9 Z! 9 c d < c=d A 2 6 < 69 9 & $ 6 6 < 6 ( /'' c d6 , > > 19 A & A 9 % %. ! > 6 6 A , > c:d < "% A A '0 . 2 *( 69 '0 . 2 6 9 9 > '9 6 c=d c;d < c@d E # > !3 cBd / 2 9 1 1 $ 6 9 2 > 9 # 6 < A $ 9 ( > 1 1 % " 1 %" C ' 6 $ 9 % 2 K &6 % "' c ?d > > ,9 cd > < > 6 L2 2 ) 6 2 2 1 '- " *92 (6 ' '- '- 504 $ "#0 - 6 * 9 9 - $ 9 ( .$ ,$ # #$ 03 1$ $ ! 2 & 6 6 2 Z! 2 " ! % & & , & > 5 *9 9 > " 2 , > 6 > > % ." % % 6 A 9 9 > > 2 ' 2 2 ( 6 2 < ( 6 9 9 '9 2 > 2 ( & 6 9 '- " 9 1 9#"$6 [ A ( & < A 6 , # %. % ." % % " " , > L > 2 ( & , , & + & 6 6 6 ! % & ( & 6 < 6 %1" < ! 6 < < i')< >6 , & 9 > + + < K < / ! > 9"$ ' > & < 9 + + + + + 3 2 < + < 6 < + & > < > ( 2 , & < 6 9 > < < & 6 \N ' A 9 9 2 6 A K A9 > 5 X Y + + 9 2 >& '- 9 6 < 9 K , 1 " 2 A 9 K A 9 A ???< 9 > 6 & 9 A c ?d 9 ,9 $ # 7$ 3" , # 9 '9 6 A 5 , > > 6 " , '< 0 '#0. =B '- $ "#0 - 504 - .$ ,$ # #$ 03 1$ $ ! 1 " '- 9 & 2 9A ' 9 9 A 9 9 O > / ! 1 ! ," %$ > 6 ( ' ) 2 % < A 2 & 9 / ! < 9 6 K 6 > 9 > < > 6 9 + + J < 9 + < < 9 92 < 6 9 ' K / ! )< & % < 9 < & & % > (> 9 K c (Bd ' ) 6 6 % & ' 6 & > < &6 > '- " M 6 + ! ) .! * M )% ,9 6 cd c@d "( 1 2 2 ( < > 9 > "( 1 2 & 2 > 6 < 2 K = " " #&% ! % & & J D % ." % % > "( 1 2 < 9 9 + 6 < 9 + + J >6 " , '< 0 '#0. 9 $ 3 $ > < 6 9 > I J + 9 '> 6 !3 < '- 504 $ "#0 - - 5;< .$ ,$ # #$ 03 1$ $ ! 1 " '- >&6 ' 9 9 + + 6 > '- " + 2 < 2 1 6 < < > & 9 6 , > 9 ,9 "$ c ( d > 2 A > + 6 c (?d 6 9 & FIFO TCP FIFO 5 X Y , # K 2 1 c:d Q 6 2 ) " >& 2 9 " > ! % ! Vitec ! $ % & $&. 9 # & > 9 "( 1 2 ;& 9 % 6 X " ) 1" "( 1 2 c ?d < 3 9 6 6 6 5 X Y , # "( 1 2 , > 6 , # 2 > %#$ ':2 6 69 5 > "( 1 2< 5 X Y 9 > 2 J + < 6 9 9 6 ' > 2 " > " 6 6 6 , ' 9 A 9 '> " + >&6 + 6 1 % ." % % <+ $ & "( 1 2 9 > ' > "( 1 2 , PCI BUS Pentek & 9 9 PC + DSPs TCP Sundance BIFO Pentek Vitec %"& MPEG-4 Decoder FIFO FPGAs + DSPs SDB Sundance PCI BUS ; UMTS Demodulation FPGAs + DSPs PC ' 9 6 / UMTS Modulation MPEG-4 Coder ,M = 9 2 K ) .! / + < , ' "1 / , 6 " 2 < , '< 0 '#0. E # 96 > " " , ' 2 =? '- 504 $ "#0 - - "( 1 2< =" % > A 2 > > 6 .$ ,$ # #$ 03 6 > 1 " / > 6 A 3 # , K A "( 1 2 "( 1 2 =" 5, & ,9 > 51 3 # ' > > 9 =< A9 =" 5, & & , ( 2 < 6 =" < > c ( d =" % < N)< & '- " > '9 9 "( & ) < 9 '- < 2 > 1$ $ ! * < 5$ #0, & @ K ' 69 < 9 A 9 > 2 + / ! < ( < 9 19Q "( 1 2 <A > 6 A 3#, 5 6 9 < & > ( 51 7 5, A 69 E-1, ( > / ! 5 ,9 9 A 2 > > + 9 $ # 7$ 3" ) 5 X Y : , # % 9 < 6 A & , #< '9 69 >& > ' ' * $ > 2& # 6 > 6 >& 5 X Y " , ' () ' * + % " & A * A , # , % . 3< 6 9 $ # 7$ 3" ; > () ' * " '*' 9 A ' 2 5 X Y , # " & * :- $ > O , '< 0 '#0. 6 >& 5 X Y / 96 Hindawi Publishing Corporation EURASIP Journal on Applied Signal Processing Volume 2006, Article ID 64369, Pages 1–13 DOI 10.1155/ASP/2006/64369 Rapid Prototyping for Heterogeneous Multicomponent Systems: An MPEG-4 Stream over a UMTS Communication Link M. Raulet,1, 2 F. Urban,1 J.-F. Nezan,1 C. Moy,3 O. Deforges,1 and Y. Sorel4 1 IETR/Image Group Lab, UMR CNRS 6164/INSA, 20, Avenue des Buttes de Coësmes, 35043 Rennes, France Electric ITE, Telecommunication Lab, 1 Allée de Beaulieu, 35 000 Rennes, France 3 IETR/Automatic & Communication Lab, UMR CNRS 6164/Supelec-SCEE Team, Avenue de la Boulaie, BP 81127, 35511 Cesson-Sévigné, France 4 INRIA Rocquencourt, AOSTE, BP 105, 78153 Le Chesnay, France 2 Mitsubishi Received 15 October 2004; Revised 24 May 2005; Accepted 21 June 2005 Future generations of mobile phones, including advanced video and digital communication layers, represent a great challenge in terms of real-time embedded systems. Programmable multicomponent architectures can provide suitable target solutions combining flexibility and computation power. The aim of our work is to develop a fast and automatic prototyping methodology dedicated to signal processing application implementation on parallel heterogeneous architectures, two major features required by future systems. This paper presents the whole methodology based on the SynDEx CAD tool that directly generates a distributed implementation onto various platforms from a high-level application description, taking real-time aspects into account. It illustrates the methodology in the context of real-time distributed executives for multilayer applications based on an MPEG-4 video codec and a UMTS telecommunication link. Copyright © 2006 M. Raulet et al. This is an open access article distributed under the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 1. INTRODUCTION New embedded multimedia systems, such as mobile phones, require more and more computation power. They are increasingly complex in design and have a shorter time to market. Computation limits of critical parts of the system (i.e., video processing, telecommunication physical layer) are often overcome thanks to specific circuits [1]. Nevertheless, this solution is not compatible with short time designs or the system’s growing need for reprogramming and future capacity improvements. An alternative can be provided by programmable software (DSP: digital signal processor, RISC: reduced instruction set computer, CISC: complex instruction set computer) or programmable hardware (FPGA: field programmable gate arrays) components since they are more flexible. Efficiency loss can be counterbalanced by using multicomponent architectures to satisfy hard real-time constraints. The parallel aspect of multicomponent architectures (programmable software and/or programmable hardware components interconnected by communication media) and possibly its heterogeneity (different component types) raise new problems in terms of application distribution. Real-time executives developed for single-processor applications can hardly take advantage of multicomponent architectures: handmade data transfers and synchronizations quickly become very complex and result in lost time and potential deadlocks. A suitable design-process solution consists of using a rapid prototyping methodology. The ultimate objective is then to go from a high-level description of the application to its real-time implementation on a target architecture [2] as automatically as possible. The aim is to avoid disruptions in the design process from a validated system at simulation level (monoprocessor) to its implementation on a heterogeneous multicomponent target. Performances of the process can be evaluated by different aspects as follows: (i) maximal independence with regards to the architecture, (ii) possibility of handling heterogeneous multicomponent architectures, (iii) maximal automation during the process (distribution/ scheduling, code generation, including data transfers and synchronizations), (iv) efficiency of the implementation both in terms of execution time and resource requirements, 2 EURASIP Journal on Applied Signal Processing (v) reduced design time, (vi) enhanced quality and robustness of the final executive. The methodologies generally rely on a description model, which must match the application behavior. These applications are a mixture of transformation and reactive operators [3]. A transformation operator is based on the data-driven process: input data is transformed into output data. A reactive operator is one, which is event-driven and has to continually react to stimuli. In practice, systems are a combination of both. Nevertheless, an important distinction can be made between systems with deterministic scheduling whose operators are mainly transformation-oriented, and systems with highly dynamic behavior whose operators are mostly reactive-oriented. For the first class of system (including signal, image, and communication applications), DFG (data flow graphs) have proven to be an efficient representation model. They enable automatic rapid prototyping and lead to optimized scheduling [4]. This paper deals with a rapid prototyping methodology based on the SynDEx tool, which is suitable for transformation-oriented systems and heterogeneous multicomponent architectures. Major contributions concern two points as follows: (i) method and tool, more specifically about automatic distributed code generation from SynDEx, (ii) a complex multilayer application including video and digital communication layers, going from its high-level description to its distributed and real-time implementations on heterogeneous platforms. SynDEx automatically generates synchronized distributed executives from both application and target architecture description models. These executives specify the inner component scheduling and global application scheduling, and are expressed in an intermediate generic language. These executives have to be transformed to be compliant with the type of component and communication media so that they automatically become compilable codes. In this article, we will focus on this mechanism based on the concept of SynDEx kernels, and detail new developed kernels enabling automatic code generation on various multicomponent platforms. The design and the distributed implementation of a multilayer application composed of a video (MPEG-4) and a digital communication layer (UMTS) illustrate the methodology. An MPEG-4 coding application provides the UMTS transceiver with a video coded bitstream, whereas the associated MPEG-4 decoder is connected to the UMTS receiver in order to display the video. The result is a complete demonstration application with automatic code generation over several kinds of processors and communication media. The digital communication layer under investigation is a UMTS FDD (frequency-division duplex) uplink transceiver [5]. UMTS is the European and Japanese selected standard for 3G. It has already spread to many areas of the world, but is not yet predominant. 3G should enable us to benefit from new wireless services requiring quite a high data rate up to 2 Mbps. Typical targeted applications go from wireless internet to video streaming, and also include high-speed picture exchanging and of course voice. MPEG-4 is the latest multimedia compression standard to be adopted by the moving picture experts group (MPEG) [6]. The prototyping of MPEG-4 video codecs over multicomponent platforms and their optimizations are studied in the IETR Image Group Laboratory. A part of the project has already been presented in [7]. We will therefore focus on the coupling between the UMTS and MPEG-4 subsystems rather than describe the video codec in detail. The paper is organized as follows: Section 2 introduces the SynDEx tool and the AAA methodology. Our contribution in terms of prototyping platforms and executive kernels is described in Section 3. The UMTS description according to the AAA methodology and its implementations are explained in Section 4. The methodology is illustrated and validated by the application (MPEG-4 + UMTS) described in Section 5 what allows to reach a new stage in the relevance of the method. Finally, conclusions and open issues encountered during the application development are given in Section 6. 2. SYNDEX OVERVIEW SynDEx1 is a free academic system-level CAD (computeraided design) tool developed in INRIA Rocquencourt, France. It supports the AAA methodology (adequation algorithm architecture [8, 9]) for distributed real-time processing. 2.1. Adequation algorithm architecture A SynDEx application (Figure 1) comprises an algorithm graph (operations that the application has to execute), which specifies the potential parallelism, and an architecture graph (multicomponent [10] target, i.e., a set of interconnected processors and specific integrated circuits), which specifies the available parallelism. “Adequation” means efficient mapping, and consists of manually or automatically exploring the implementation solutions with optimization heuristics [9]. These heuristics aim to minimize the total execution time of the algorithm running on the multicomponent architecture, taking the execution time of operations and of data transfers between operations into account. These execution times are determined during the characterization process, which associates a list of characteristics, such as execution times, necessary memory, and so forth, with each (operation, processor)/(data transfer, communication medium) pair, respectively. An implementation consists of both performing a distribution (allocating parts of the algorithm on components) and scheduling (giving a total order for the operations distributed onto a component) the algorithm on the architecture. Formal verifications during the adequation avoid deadlocks in the communication scheme thanks to semaphores 1 www.syndex.org M. Raulet et al. 3 User Architecture graph Constraints Algorithm graph SynDEx Adequation distribution/scheduling heuristic Generic synchronized distributed executives Target 1 kernel · Timing graph (predictions) Target N kernel Comm M kernel M4 Dedicated executives for specific targets (specific compilers/loaders) Figure 1: SynDEx utilization global view. inserted automatically during the real-time code generation. Moreover, since the Synchronized Distributed EXecutives (SynDEx) are automatically generated and safe, part of the tests and low-level hand-coding are eliminated, decreasing the development lifecycle. SynDEx provides a timing graph, which includes simulation results of the distributed application and thus enables SynDEx to be used as a virtual prototyping tool. In the AAA methodology, an algorithm is specified as an infinitely repeated DFG. Each edge represents a data dependence relation between vertices, which are operations; operation stands for a sequence of instructions, which starts when all its input data is available and which produces output data at the end of the sequence. In SynDEx, there is an additional notion of reference. Each reference corresponds to the definition of an algorithm. The same definition may correspond to several references to this definition. An algorithm definition is a repeated DFG similar to those in AAA, except that vertices are references or ports so that hierarchical definitions of an algorithm are possible. 2.2. Automatic executive generation The aim of SynDEx is to directly achieve an optimized implementation from a description of an algorithm and an architecture. SynDEx automatically generates a generic executive, which is independent of the processor target, into several source files (Figure 1), one for each processor [11]. These generic executives are static and are composed of a list of macrocalls. The M4 macroprocessor transforms this list of macrocalls into compilable code for a specific processor target. It replaces macrocalls by their definition given in the corresponding executive kernel, which is dependent on a processor target and/or a communication medium. In this way, SynDEx can be seen as an off-line static operating system that is suitable for setting data-driven scheduling, such as signal processing applications [12, 13]. SynDEx kernels are available for several processors, such as the TI2 TMS320C6x (C62x, C64x) and the Virtex FPGA families, and for several communication media such as links SDBs (Sundance digital buses-Sundance high-speed FIFOs), CPs (comports-Sundance FIFOs), BIFOs (BI-FIFOs-Pentek FIFOs), PCI bus, and TCP bus presented in the following section. 2.3. Design process Our previous prototyping process integrated AVS3 (advanced visual systems) as a front-end [14] for functional checking. AVS is a software designed for DFG description and simulation. The application was constructed by inserting existing modules or user modules into the AVS workspace, and by linking their inputs and outputs. The validated DFG was next converted into a new DFG by a translator to be compliant with SynDEx algorithm input. The main advantage was the automatic visualization of intermediate and resulting images at the input and output of each module. This characteristic enables the image processing designer to check and validate the functionality of the application with AVS before the step of the implementation. Although SynDEx is basically a CAD tool for distribution/scheduling and code generation, here we demonstrate that SynDEx can also be directly used as the front-end of the process for functional checking (as it is possibly done with AVS). This is made possible thanks to our kernels presented in Section 3. The design process is now based on a single tool and is therefore simpler and more efficient. SynDEx therefore enables full rapid prototyping from the application description (DFG) to final multiprocessor implementation (Figure 2) in three steps as the following: 2 3 Texas instrument. www.avs.com 4 EURASIP Journal on Applied Signal Processing 3. Step 1 Sequential executive (PC) target Functional checking visual C++ application Step 2 SynDEX Sequential executive (PC) with chorno. primitives visual C++ application Sequential executive (DSP) with chorno. primitives code composer application Nodes timing estimation Step 3 Distributed executive (PC + DSPs) As described above, the SynDEx generic executive will be translated into a compilable language. The translation of SynDEx macros into the target language is contained in library files (also called kernels). The final executive for a processor is static and composed of one computation sequence and one communication sequence for each medium connected to this processor. Multicomponent platform manufacturers must insert additional digital resources between processors to make communication possible. Thus, SynDEx kernels depend on specific platforms. 3.1. Parallel application Figure 2: SynDEx utilization global view. Step 1. The user creates the application DFG using SynDEx. Automatic code generation provides a standard C code for a single host computer (PC) implementation (SynDEx PC kernel). In this way, the user can design and check each C function associated with each vertex of its DFG, and can check the functionalities of the complete application with any standard compilation tools. With automatic code generation, visualization primitives or binary error rate computation can be used for easy functional checking of algorithms. The user can easily check his or her own DFG on a cluster of PCs interconnected by TCP buses. With this cluster, the user can emulate his or her embedded platform thanks to SynDEx distributed scheduling. Step 2. The developed DFG is then used for automatic prototyping on monoprocessor targets so that to chronometric reports are automatically inserted by the SynDEx code generator. Each duration associated with each function (i.e., vertex) executed on each processor of the architecture graph is automatically estimated using dedicated temporal primitives. Step 3. The user can easily use these durations to characterize the algorithm graph by entering these values in SynDEx. Then, SynDEx tool executes an adequation (optimized distribution/scheduling) and generates a real-time distributed and optimized executive according to the target platform. Several platform configurations can be simulated (processor type, their number, and also different media connections). The main advantage of this prototyping process is its simplicity because most of the tasks performed by the user concern the description of an application and a compiling environment. Only a limited knowledge of SynDEx and compilers is required. All complex tasks (adequation, synchronization, data transfers, and chronometric reports) are executed automatically, thus reducing the “time to market.” The user can rapidly explore several design alternatives by modifying the architecture graph or adding constraints. SYNDEX EXECUTIVE KERNELS Development platforms Different hardware providers (Sundance, Pentek) were chosen to validate automatic executive generation. Many component and intercomponent communication links are used in their platforms, ensuring accuracy and the generic aspect of the approach. The use of several hardware architectures guarantees generic kernel developments. Sundance4 platform: A typical Sundance device is made up of a host PC with one or more motherboards, each supporting one or more TIMs (Texas instrument module). A TIM is a basic building block from which you build your system. It contains one processing element, which is not necessarily a DSP, but an Input/Output device, or an FPGA. A TIM also provides mechanisms to transfer data from module to module. These mechanisms, such as SDBs (200 MB/s), CPs (20 MB/s), or a global bus (to access a PCI bus up to 40 MB/s), are implemented on the TIMs using FPGAs. The SMT320 motherboard (Figure 3) is modular, flexible, and scalable. Up to four different modules can be plugged into the SMT320 and connected using CP or SDB cables. The SMT361 TIM with a TMS320C6416 (400 Mhz) is very suitable for imaging processing solutions as the TMS320C64xx has special functions for handling graphics. The SMT319 TIM is a framegrabber, which includes a TMS320C6414 and two nonprogrammable devices: a BT829 PAL to YUV encoder, and a BT864 YUV to PAL decoder. These two devices are connected to the TMS320C6414 DSP thanks to two FIFOs, which are equivalent to SDBs with the same data rate. An SMT358 is composed of a programmable Virtex FPGA (XCV600) which integrates specific communication links and specific IP blocks (computation). Pentek5 platform. The Pentek p4292 platform (Figure 4) is made up of four TMS320C6203 DSPs. Each DSP has three communication links: two bidirectional (300 Mhz) interDSP links and one for the Input/Output interface. The four DSPs are already connected to each other in a ring structure. Some daughterboards may be added to the p4292 thanks to the VIM (velocity interface mezzanine) bus, such as analog-to-digital converters (ADC p6216), digital-to-analog converters (DAC p6229), or FPGAs (XC2V3000, XC2Vx Virtex2 family). 4 5 http://sundance.com/ http://www.pentek.com/ M. Raulet et al. 5 PC (pentium) Personal computer PCI Embedded motherboard: SMT320 DSP2 (TMS320C6416) SDBa SDBb CP0 CP1 PCI PCI (Bus)PCI SMT361 Bus 6 (CP) FPGA1 (Virtex) SDBa SDBb CP0 CP1 CP2 CP3 Bus 3 (SDB) SMT358 Bus 1 (SDB) DSPC3 (TMS320C6414) SDBa SDBb VID in VID out In (VID in) Out (VID out) PAL to YUV (BT829) VID in YUV to PAL (BT864a) VID out SMT319 Figure 3: Example of Sundance architecture topology. This stand-alone Pentek platform is connected to an Ethernet network. This allows TCP/IP (1.5 MB/s) communications between DSPs and any computer in the network in order to check a binary error rate, or to visualize a decoded image. However, this Bus’s throughputs will not authorize the transfer of uncompressed data. 3.2. Software component kernels Most of the kernels are developed in C language so that they can be reused for any C software programmable device. These kernels are similar for the host computer (PC) and the embedded processors (DSPs). The generated executive is composed of a sequential list of function calls (one for each DFG operation). This kind of executive and the fact that the adapted C compiler for DSPs has really improved in terms of resource use mean that the gap between an executive written in C and an executive written in an assembly language is narrow. The user can design each function associated with each vertex of its DFG in C or assembly language for better results [15]. SynDEx creates a macrocode made of several interleaved schedulers: one for computation and the others for communications allowing parallelism of those actions. We have chosen to use multichannel enhanced DMA (direct memory access) transfers, thus maximizing parallelism and timing performance. Data transfers are executed in parallel with computation minimizing communication duration. DMA and CPU have their own bus to access the internal memory, therefore bus conflicts only appear when CPU and DMA access an external memory. As all data buffers are in internal memory, memory bus conflicts are null between CPU and DMA accesses. Communication overhead is only due to DMA setup which is negligible to take transfers into account (a few assembly instructions) [16]. The development of an application on TI processors can be hand-coded with TI RTOS (real-time operating system) called DSP/BIOS [17]. DSP/BIOS is well-suited for multithread monoprocessor applications. Several processors must be connected to improve computational performances and reach real-time performances. In this case, the multithread multiprocessor 3L diamond6 RTOS is more appropriate for this situation than DSP/BIOS. Applications are built as a collection of intercommunicating tasks. The mapping and scheduling of each task are chosen manually. Then data transfers and synchronizations are implemented by the RTOS using precompiled libraries. 3L enables multiprocessor application development easier, faster, and suited to dynamic communications between tasks. Data transfers are realized using DMA, but without any computation parallelism which is nearly equivalent to polling technique. Data transfers in a signal processing application are generally statically defined both in terms of data type and number so that their description with a DFG is suitable. The execution of DFG operations is also well defined so that 6 http://www.31.com 6 EURASIP Journal on Applied Signal Processing PC (Pentium) TCP Personal computer ADC daughterboard ADC 1 (DAC) VIM TCP (TCP) VIM 1 (BIFO) DSP-A (TMS320C6203) XX YY IO TCP DSP-C (TMS320C6203) XX YY IO TCP P4292 motherboard DSP-B (TMS320C6203) XX YY IO TCP Bus 4 (BIFO) Bus 2 (BIFO) Bus 1 (BIFO) Bus 3 (BIFO) DSP-D (TMS320C6203) XX YY IO TCP Embedded boards Figure 4: Pentek 4292 motherboard and its daughterboard. data transfers can be implemented with static processes. As static processes are faster than dynamic ones, SynDEx kernels are developed without any RTOS. That is to say that the SynDEx generic executive is not transformed into dynamic RTOS functions, but into specific static optimized functions. 3.3. Communication media kernels With AAA methodology, two different models are possible for communication media between processors: the SAM (single access memory) and RAM (random access memory, shared memory) models. The SAM model corresponds to FIFOs in which data are pushed by the producer if it is not full, and then pulled by the receiver if the FIFO is not empty. Synchronizations between the two processors are hardware signals (empty and full flags) and are not handled by SynDEx semaphores. The data must be received in the same order as it is sent. Most of our kernels are designed according to this model. SDBs, CPs, and BIFO DMAs enable parallelism between calculation and communications, whereas TCP and BIFO do not enable it (data polling mechanism). The RAM model corresponds to an indexed shared memory. A memory space is allocated, and an interprocessor synchronization semaphore is created for each item of data that has to be transferred. This mechanism allows the destination processor to read data in a different order to which it has been written by the source processor. Interprocessor synchronizations are handled by SynDEx. The first implementation of the RAM model, through the PCI bus, is described in the following section. A PCI transfer kernel, for communications between a DSP on Sundance platforms and the host computer, is first developed with the SAM model. First, the host and DSP must be synchronized. Each data transfer therefore encloses two synchronizations because the PCI bus does not have hardware signals like a usual FIFO (full or empty flag). The receiver must first wait for the sender to write new data in the PCI memory. Then, the receiver can read data from the PCI memory and send an acknowledgement back to the sender. This “rendez-vous as soon as possible mechanism” induces idle or wait states, but is mandatory to ensure the medium is ready for the next transfer and to guarantee transfer order. PCI communications using the SAM model reach a maximum transfer rate of 16 MB/s. This mechanism drastically slows down PCI transfers. In addition, a shared buffer is actually allocated to the PC’s RAM by the PCI bus driver. Therefore, a new PCI kernel implementing the RAM model has been developed, and the transfer rate has been improved (up to 40 MB/s). Each item of data that has to be transferred has its own address allocation in the PCI memory and corresponding semaphores, which allows several buffers to be written before the first one is read. This results in less wait states and more time for computation. The PCI scheduler is controlled by interrupt when using this model. Consequently, communications and computations can be concurrent on the DSP, thus reducing overall execution time. 3.4. Hardware component kernel Moreover, an FPGA kernel for programmable hardware components has been developed in HDL (hardware description langage) and could be considered as a coprocessor in order to speed up a specific function of the algorithm. This kernel handles automatic integration of intercomponent communication syntheses and instantiates a specific IP (intellectual properties) block. M. Raulet et al. 7 Code generation Generic SynDEX.m4x Architecturedependent Processor typedependent C62x.m4x C64x.m4x Pentium.m4x FPGA.m4x Application-dependent Application name.m4x Media-type-dependent SDB.m4x (C62x, C64x, FPGA) CP.m4x (C62x, C64x, FPGA) Bus-PCI-SAM.m4x (C62x, C64x, Pentium) Bus-PCI-RAM.m4x (C62x, C64x, Pentium) TCP.m4x (Pentium, C62x) BIFO.m4x (C62x, C64x, FPGA) BIFO-DMA.m4x (C62x, C64x, FPGA) Figure 5: SynDEx kernel organization. Programming of a communication link depends on its type, but also on the processor. Previous works have already validated these libraries [18], however, they need to evolve with processors or communication links (depending on provider’s additional logic). 3.5. Kernel organization Table 1: Legenda of UMTS FDD transmitter. SRC CRC SEG COD EQU INT1 INT2 SPRdata SPRctrl SUM CST-SCR-code SCR DPCCH PSH Source (pseudorandom generator) Add of cyclic redundancy check bits Segmentation Channel coding Equalization First interleaving Second interleaving Spreading of information bits Spreading of control bits Creation of a complex signal Generation of the scrambling code Scrambling Generation of control bits Pulse shaping The libraries are classified to make developments easier and to limit modifications when necessary. As shown in Figure 5, these files are organized in a hierarchical way. An applicationdependent library contains macros for the application, such as the calls of the algorithm’s different functions. A generic library contains macros used regardless of the architecture target (basic macros). The others are architecture-dependent: processor or communication type-dependent. Processordependent libraries contain macros related to the real-time kernel, such as memory allocations, interrupt handling, or the calculation sequence. Communication type-dependent libraries contain macros related to communications: send, receive, and synchronization macros, communication sequences. As different processor types (with different programming of the link) can be connected by the same communication type, one part per processor type can be found in one library. The right part of the file is used during the macroprocessing. Kernels have been developed for every component of the platforms described in Section 3.1. When SynDEx is used for a new application, only the application-dependent library needs to be modified by the user. Architecture-dependent libraries are added or modified when a new architecture is used (a processor or a medium that does not have its kernel). application and signal processing layers are very demanding. This partially explains the delay in the effective arrival of UMTS on the market. It presents a very interesting case study for high efficiency multiprocessing heterogeneous implementations. This becomes even more relevant in a software radio [19] context, which aims to implement as much radio processing as possible in the digital domain, and especially onto processors and reconfigurable hardware. The advantages firstly consist of easing the system design, while privileging fast software instead of heavy low-level hardware development. Secondly, the system supports new services and features thanks to software adaptation capability during system operation [20]. 4. 4.1. UMTS APPLICATION UMTS is much more challenging than previous 2G systems, such as GSM. In particular, UMTS signals have a 3.84 MHz bandwidth compared with 270 kHz for GSM. Both General description UMTS FDD physical layer algorithms explained in [5] are implemented for baseband from cyclic redundancy check (CRC) to pulse shaping (PSH) (Table 1) for the transmitter 8 EURASIP Journal on Applied Signal Processing Frame/frame CST SCR code Transport block SRC CRC SEG COD EQU INT1 INT2 SPR data SCR SUM DPCCH SPR ctrl PSH Slot/slot Figure 6: UMTS FDD transmitter (Tx). Frame/frame CST SCR code Transport block BER DCRC Slot/slot DSEG DCOD DEQU DINT1 DINT2 DSPR data DSCR RAKE MFL Figure 7: UMTS FDD receiver (Rx). as shown in the DFG in Figure 6. This does not represent a total real UMTS since synchronization is artificial and no propagation channel is used (the link is completely digital). Data may be generated by an arbitrary source (SRC Figure 6: not in the standard) for bit-error-rate verifications or extracted from a real application, such as a video stream, to make demonstrations. Link characteristics in the measured version are as follows: (i) (ii) (iii) (iv) (v) 1 transport channel, 1 physical channel, no channel coding, spreading factor of 4, data rate of 950 kbps. The receiver [5] extracts the information necessary for the application using the scheme represented in Figure 7 (Table 2). The number of operations effectively in use is much greater than the figures shown, as most of them are duplicated several times. The generation of a 10 ms frame (composed of 15 slots) requires the instantiation of approximately 140 operations for Tx and 240 for Rx in this version, which is a minimum. The granularity of the operations has the same level of complexity as a FFT, FIR, or a memory reorganization. 4.2. FIR implementation The filter operation is of particular interest because its implementation complexity makes it very resource consuming. This is a FIR (finite impulse response) with a raisedroot cosine impulse response specified by the UMTS standard at both transmitter baseband output and receiver baseband input. Here, the impulse response is symmetric around its center; this characteristic can be exploited to minimize the number of memory accesses, the required memory for storing the filter coefficients and the number of multiplication operations. In order to obtain a convenient rejection of contiguous bands, the filter impulse response is spread over 16 chips and consequently has 33 taps with an oversampling of 2. The same coefficients are used for Tx and Rx. Equation (1) gives us the representation of a FIR filter with an odd number of coefficient, where h is the real coefficient vector of the filter impulse response (filter taps), K is number of coefficients (or taps), x[n] and y[n], the nth input and output complex data samples, respectively. y[n] = h + K −1 K −1 ·x n− 2 2 (K − 1)/2−1 h[k] · x[n − k] + x[n − K + 1 + k] . k=0 (1) A real filter (i.e., filter whose coefficients are real) applied to complex data is very frequent in baseband (BB) processing and consists of applying the same filter independently to the real and imaginary parts of the data samples. In our case we are interested in fixed point implementations, so care must be taken to avoid overflow while preserving signal quality (in terms of SNR). The filter at Tx is called pulse shaping (PSH), and at Rx matched filtering (MFL). At Tx PSH and oversample (which consists of inserting zero between binary digits), operation can be combined in order to minimize computation. In this case we obtain the following: if n is even, y[n] = (K −1)/4 k=0 h[2k] · x[n − k] + x n − (K − 1) +k 2 , (2) M. Raulet et al. 9 Table 2: Legenda of UMTS FDD receiver. MFL h(0, . . . , K) Matched filter Simplified RAKE (one perfectly synchronized finger) RAKE CST-SCR-code DSCR DSPRdata DINT2 DINT1 DEQU DCOD DSEG DCRC BER x(−K + 1, . . . , −1, 0, . . . , N − 1) Generation of the scrambling code Descrambling Despreading of information bits Deinterleaving 2 Deinterleaving 1 Equalization inverse operation Channel decoding Transport block extraction Analysis of cyclic redundancy check bits Bit error rate State update Figure 8: Data management for DSP implementation of an FIR. Table 3: Timing of PSH (input: 2560 samples). Target if n is odd, y[n] = h + K −1 K −1 ·x n− 2 2 (K − 1)/4−1 k=1 y(0, . . . , N − 1) State New data Time/slot (microseconds) FIR (K taps) C62x 300 Mhz 576 C64x 400 Mhz 320 XC2Vx 100 Mhz 338 Table 4: Timing of MFL (input: 5120 samples). Target (K − 1) h[2k] · x[n − k]+x n − +k 2 . Time/slot (microseconds) (3) The nature of a FIR operation is particularly suited to FPGA implementations, but can also be implemented on DSP processors. A specific characteristic of the DSP is that it has a MAC (multiply accumulate) or a VLIW structure to support filtering computing in one clock cycle. The TMS320C6x family, based on VLIW architecture, has six adders and two multipliers, which operate in parallel and complete execution in one clock cycle. A fixed point multiply accumulate takes two instructions: multiply on one cycle and accumulate on the next. Thanks to pipelining, it is possible to effectively compute two multiply accumulates per cycle. The performance then directly depends on filter length and processor clock frequency as each tap is processed sequentially. In an FPGA, it is possible to parallelize part or all of these operations, depending on the available gate surface. FIR implemented in the FPGA is a distributed arithmetic (DA) filter [21]. Features of this FIR are not multipliers, but only read only memory (ROM) and accumulators. The complexity of this filter only depends on the number of bits per sample, not on the number of taps. In the particular case of C6x, it is possible to use a data buffer organization of the FIR as shown in Figure 8. FIR is a typical case where functional units in the microprocessor datapath can speed up processing. Data is processed in blocks. The interface consists of an input data buffer, the coefficient buffer, and an output data buffer. The algorithm for each input sample performs the function of y[n] in a for-loop. At the end of each block processing operation, the filter state is updated by copying the last K input data into a state buffer (Figure 8). For the sake of processing efficiency, it is assumed that the input data buffer is stored in a memory after the state data buffer so that negative C62x 300 Mhz 1130 C64x 400 Mhz 640 XC2Vx 100 Mhz 338 Table 5: Tx timings and PSH ratio. Target Sundance Configuration Time/frame PSH ratio 1∗ C64x 1∗ C62x 9.5 ms 50% 11.8 ms 73% Pentek 1∗ XC2Vx 2∗ C62x 1∗ C62x 8.5 ms 9.6 ms 53% 52% indices of the input data buffer point to the state buffer data. In Tables 3 and 4, the differences in timing between C62x and C64x (without taking clock rates into account) are due to the fact that compilers are not the same for each processor, and that those DSPs have different internal architectures. In an FPGA (XC2Vx), this FIR operation could be more parallelized giving better acceleration to the detriment of the gate surface. However, these time values are sufficient to get a Tx or Rx real-time application, that is why we use the same FIR implementation for PSH and MFL. An elementary oversampling function just has to be added before PSH. On the contrary to FPGAs, we take advantage of the FIR features (cf. Section 4.2) on DSPs to optimize and divide by 2 the computation complexity of PSH at Tx, so that 576 microseconds versus 1130 microseconds are obtained on C62x, and 320 microseconds versus 640 microseconds on C64x. 4.3. Tx and Rx implementations Four different implementations (Table 5) of a UMTS transmitter have been automatically tested using SynDEx: three are implemented on Pentek platform and one on Sundance platform. A transmitter application must last under 10 ms to be real time. 10 EURASIP Journal on Applied Signal Processing Table 6: Rx timings and MFL ratio. Target Sundance ∗ Pentek ∗ Configuration 1 C64x 1 C62x Time/frame MFL ratio 15.9 ms 60% 20.2 ms 84% 1∗ XC2Vx 1∗ C62x 9.9 ms 32% Principally, due to PSH (Table 5, timing PSH ratio compared to a Tx implementation), the first transmitter implementation onto the Pentek platform did not reach real time with one C62x DSP, however, it is possible to parallelize PSH in order to process half of the samples on two processors. Before filtering, two buffers of 1296 samples (as described in Figure 8) must be created. Each block processing operation overlaps 16 transient samples. The length of this PSH is reduced by 1.5 when transfers are taken into account. Furthermore, code generation and kernels can be used to quickly shift to another platform. UMTS prototyping on the Sundance platform required indeed few hours to reach to a real-time transmitter application, thanks to our previous works (UMTS algorithm description, SynDEx code generation and kernels) on Pentek platform. This is a tremendous proof of the portability capabilities offered by the methodology. UMTS Rx has been implemented according to three different configurations (Table 6). A real-time application has been achieved on the Pentek platform with one DSP and one FPGA. MFL parallelization is also possible on several DSPs on Pentek platform, however, more than two DSPs are added compared with one FPGA in the previous configuration. A configuration with 4 DSPs requires many transfers in the Pentek ring structure, thus not reducing MFL computation length by too much. 5. MPEG-4 OVER UMTS: A MULTILAYER SYSTEM MPEG-4 is the latest compression standard. An MPEG-4 codec can be divided into ten main parts (e.g., system, visual, and audio) with different timing requirements and execution behaviors. Each part is divided into profiles and levels for the use of the tools defined in the standard. Each profile (at a given level) constitutes a subset of the standard so that MPEG-4 can be seen as a toolbox where system manufacturers and content creators have to select one or more profiles and levels for a given application. The application handled here is an MPEG-4 part 2 codec developed in our laboratory, which is based on the Xvid7 codec. This MPEG-4 codec has also been tested on several distributed platform configurations [7] (multi-DSP implementation). Here, our aim is to interface UMTS with MPEG-4 to provide a bitstream to the UMTS application. The methodology permits to merge the design of very different (heterogeneous) parts of the system in terms of hardware processing support (PC, DSP, FPGA) as well as 7 www.xvid.org processing nature. A conventional methodology would require different environments, which is a cause of bugs and incompatibility at the integration step. This causes delays in the best case, and could even completely question the design in the worst case. Our approach permits to gather the different parts of the design very early in the design flow and anticipate integration issues. Nevertheless, MPEG-4 over UMTS arises a new difficulty: the complete application is a multilayer system (two layers MPEG-4 and UMTS) with different data periodicities between layers. A consequence is that the whole application cannot be represented by a single DFG. The solution consists of breaking up the UMTS physical layer and the video codec layer into four algorithm subgraphs. Then these subgraphs (coder, decoder, modulation, and demodulation) have been implemented onto several processors connected each other with media (FIFO) following the topology of Figure 9. The MPEG-4 codec is not embedded here: firstly, TCP throughputs on the Pentek platform do not enable uncoded or uncompressed data to be transferred, and secondly, too few Sundance TIMs are available in our laboratory to embed a complete application with UMTS + MPEG-4. Our realtime MPEG-4 codec provides the maximum data rate supported by our UMTS transceiver (950 kbps). An MPEG-4 bitstream, coded on a PC, is sent via a UMTS telecommunication link to another PC to be decoded. Once the communication transceiver has been implemented on a platform, it can be viewed as a communication medium equivalent to a FIFO. So the platform integrating the MPEG-4 codec could be described as two PCs interconnected by a UMTS communication medium. A FIFO is used to connect asynchronous applications (codec to UMTS communication link). Asynchronous means different periodicities and different data exchange formats. A codec cycle corresponds to one image processing operation producing a variable compressed bitstream in a variable time (about 40 ms). A UMTS cycle executes one fixed size frame in 10 ms. FIFO material signals (empty and full flags) ensure the self-regulation of the global system (UMTS + MPEG-4). Two implementations of this global system have been rapidly done onto two platforms thanks to developed kernels as described in Figure 9. The global system runs in real time on Pentek platform and is not far from real time on Sundance platform (Rx is in 16 ms and must be 10 ms). The first implementation of the global application on Pentek platform takes quite a long time (two months) to find and solve the multilayer issue, but this implementation is instantaneously transposed on Sundance platform, which exactly illustrates the efficiency and the pertinence of the approach. 6. CONCLUSIONS AND OPEN ISSUES The design process proposed in this paper covers every step, from simulation to integration in digital signal application development. Compared with a manual approach, the use of our fast prototyping process ensures easy reuse, reduced time to market, design security, flexibility, virtual prototyping, efficiency, and portability. M. Raulet et al. 11 MPEG-4 coder UMTS modulation FIFO PC TCP PCI bus FPGAs + DSPs Sundance Pentek UMTS demodulation FIFO SDB BIFO FIFO FPGAs + DSPs TCP Sundance PCI bus Pentek MPEG-4 decoder PC Figure 9: MPEG-4 over UMTS. On the one hand, we have shown how SynDEx is capable of manually or automatically exploring several implementation solutions using optimization heuristics, and on the other hand, how it automatically generates dedicated distributed real-time executives from kernels dependent on the processors and the media. These executives are dedicated to the application because they do not use any support of RTOS, and are generated from the results of the adequation taking operation and data transfer distribution and scheduling into account while providing synchronizations between operations and data transfers and between consecutive repetitions of the DFG. The kernels enable recent multiprocessor platforms to be used and also enable the process to be extended to heterogeneous platforms. It was tested on several different architectures composed of TI TMS320C6201, TMS320C6203, TMS320C6416 DSPs, Xilinx Virtex-E and Virtex-II FPGAs, and PCs. The calculations and data transfers are executed in parallel. RAM and SAM communication models have been tested for PCI transfers. Higher transfer rates are reached using the RAM model enabling real-time video transfers between a PC and a DSP platform. Several complex tasks are performed automatically, such as distribution/scheduling, code generation of data transfers and synchronizations. So the development of a new application is limited to the algorithm description and to the adaptation of kernels for platforms or components. Furthermore, as the C language is used and there is a large number of tested topologies, developed DSP kernels can easily be adapted to any other DSP and communication media. The current MPEG-4 + UMTS application is still in progress to achieve wireless communication. A new version already integrates the channel coding (Turbo code) steps, which only slightly increases the overall complexity. This approach ensures fast prototyping of digital signal applications over heterogeneous parallel architectures in many technological fields. Other applications have already taken advantage of it. A SynDEx description of a MC-CDMA (probably planned as 4G) application has been developed by the IETR SPR laboratory [22]. LAR codec is a video codec studied in the IETR Image Group Laboratory. A similar scheme (Figure 9) has already been tested on different configurations: LAR over MC-CDMA, MPEG-4 over MCCDMA, and LAR over UMTS. The complex MPEG-4 + UMTS application stresses that a multilayer system presents some specific characteristics in terms of data flow. In the future, this case study may be capitalized on creating in SynDEx new hierarchical models of architecture graphs in such a way the physical layer (telecommunication link) may appear as a particular medium. Another issue is the memory allocation in SynDEx. At each output of each vertex, SynDEx creates an allocation. At this time, memory allocations are reordered and reused manually to give an optimal solution. Current works deal with an automatic solution, based on graph coloring techniques and life memory allocation. REFERENCES [1] A. M. Eltawil, E. Grayver, H. Zou, J. F. Frigon, G. Poberezhskiy, and B. Daneshrad, “Dual antenna UMTS mobile station transceiver ASIC for 2 Mb/s data rate,” in Proceedings of IEEE International Solid-State Circuits Conference (ISSCC ’03), vol. 1, pp. 146–484, San Francisco, Calif, USA, February 2003. [2] K. Keutzer, S. Malik, A. R. Newton, J. M. Rabaey, and A. Sangiovanni-Vincentelli, “System-level design: orthogonalization of concerns and platform-based design,” IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems, vol. 19, no. 12, pp. 1523–1543, 2000. [3] T. A. Henzinger, C. M. Kirsch, M. A. A. Sanvido, and W. Pree, “From control models to real-time code using Giotto,” IEEE Control Systems Magazine, vol. 23, no. 1, pp. 50–64, 2003. [4] S. S. Bhattacharyya, P. K. Murthy, and E. A. Lee, Software Synthesis from Dataflow Graphs, Kluwer Academic, Norwell, Mass, USA, 1996. [5] 3GPP TS 25.213 v3.3.0: Spreading and Modulation FDD, release 1999. [6] F. Pereira and T. Ebrahimi, The MPEG-4 Book, Prentice-Hall PTR, Upper Saddle River, NJ, USA, 2002. [7] N. Ventroux, J. F. Nezan, M. Raulet, and O. Déforges, “Rapid prototyping for an optimized MPEG-4 decoder implementation over a parallel heterogenous architecture,” in Proceedings of 28th IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP ’03), vol. 2, pp. 433–436, Hong Kong, China, April 2003, Conference cancelled - Invited paper, ICME 2003. [8] Y. Sorel, “Massively parallel computing systems with real time constraints: the “Algorithm Architecture Adequation” methodology,” in Proceedings of 1st IEEE International Conference on Massively Parallel Computing Systems (MPCS ’94), pp. 44–53, Ischia, Italy, May 1994. [9] T. Grandpierre, C. Lavarenne, and Y. Sorel, “Optimized rapid prototyping for real-time embedded heterogeneous multiprocessors,” in Proceedings of 7th International Workshop on Hardware/Software Codesign (CODES ’99), pp. 74–78, Rome, Italy, May 1999. 12 [10] Y. Sorel, “Real-time embedded image processing applications using the A3 methodology,” in Proceedings of IEEE International Conference on Image Processing (ICIP ’96), vol. 2, pp. 145–148, Lausanne, Switzerland, September 1996. [11] T. Grandpierre and Y. Sorel, “From algorithm and architecture specifications to automatic generation of distributed realtime executives: a seamless flow of graphs transformations,” in Proceedings of 1st ACM and IEEE International Conference on Formal Methods and Models for Co-Design (MEMOCODE ’03), pp. 123–132, Mont Saint-Michel, France, June 2003. [12] F. Balarin, L. Lavagno, P. Murthy, and A. SangiovanniVincentelli, “Scheduling for embedded real-time systems,” IEEE Design and Test of Computers, vol. 15, no. 1, pp. 71–82, 1998. [13] L. A. Hall, D. B. Shmoys, and J. Wein, “Scheduling to minimize average completion time: off-line and on-line algorithms,” in Proceedings of 7th Annual ACM-SIAM Symposium on Discrete Algorithms (SODA ’96), pp. 142–151, Atlanta, Ga, USA, January 1996. [14] V. Fresse, O. Déforges, and J. F. Nezan, “AVSynDEx: a rapid prototyping process dedicated to the implementation of digital image processing applications on Multi-DSP and FPGA architectures,” EURASIP Journal on Applied Signal Processing, vol. 2002, no. 9, pp. 990–1002, 2002, Special Issue on implementation of DSP and communication systems. [15] Texas Instruments, “TMS320C6000 Optimizing Compiler User’s Guide,” reference spru187l, March 2004. [16] Y. Le Méner, M. Raulet, J. F. Nezan, A. Kountouris, and C. Moy, “SynDEx executive kernel development for DSP TI C6x applied to real-time and embedded multiprocessors architectures,” in Proceedings of 11th European Signal Processing Conference (EUSIPCO ’02), Toulouse, France, September 2002. [17] Texas Instruments, “TMS320 DSP/BIOS User’s Guide,” reference spru423b, September 2002. [18] F. Nouvel, S. Le Nours, and I. Herman, “AAA methodology and SynDEx tool capabilities for designing on heterogeneous architecture,” in Proceedings of 18th Conference on Design of Circuits and Integrated Systems (DCIS ’03), Ciudad Real, Spain, November 2003. [19] A. Kountouris, C. Moy, and L. Rambaud, “Reconfigurability: a key property in software radio systems,” in Proceedings of 1st Karlshruhe Workshop on Software Radios, Karlsruhe, Germany, March 2000. [20] C. Moy, A. Kountouris, and A. Bisiaux, “HW and SW architectures for over-the-air dynamic reconfiguration by software download,” in Proceedings of Software Defined Radio workshop of IEEE Radio and Wireless Conference (RAWCON ’03), Boston, Mass, USA, August 2003. [21] S. A. White, “Applications of distributed arithmetic to digital signal processing: a tutorial review,” IEEE ASSP Magazine, vol. 6, no. 3, pp. 4–19, 1989. [22] S. Le Nours, F. Nouvel, and J. F. Helard, “Example of a CoDesign approach for a MC-CDMA transmission system implementation,” in Journées Francophones sur l’Adéquation Algorithme Architecture (JFAAA ’02), Monastir, Tunisie, December 2002. EURASIP Journal on Applied Signal Processing M. Raulet received his postgraduate certificate in signal, telecommunications, images, and radar sciences from Rennes University in 2002, and his Engineering degree in electronic and computer engineering from National Institute of Applied Sciences (INSA), Rennes Scientific and Technical University in 2002, where he is currently working as a Ph.D. student in collaboration with Mitsubishi Electric. His interests include image compression and telecommunication algorithms and rapid prototyping. F. Urban received his postgraduate certificate in signal, telecommunications, images, and radar sciences from Rennes University in 2004, and his Engineering degree in electronic and computer engineering from INSA, Rennes Scientific and Technical University in 2004. He is currently working as a Ph.D. student in collaboration with Thomson R&D, France. His interests include image compression, rapid prototyping, DSP optimization, and codesign. J.-F. Nezan is an Assistant Professor at National Institute of Applied Sciences (INSA) of Rennes and a Member of the IETR Laboratory in Rennes. He received his postgraduate certificate in signal, telecommunications, images, and radar sciences from Rennes University in 1999, and his Engineering degree in electronic and computer engineering from INSA, Rennes Scientific and Technical University in 1999. He received his Ph.D. degree in electronics in 2002 from the INSA. His main research interests include image compression algorithms and multi-DSP rapid prototyping. C. Moy has been an Engineer at the National Institute of Applied Sciences (INSA), Rennes Scientific and Technical University, France, 1995. He received his M.S. and Ph.D. degrees in electronics in 1995 and 1999 from the INSA. He worked from 1995 to 1999 on spread-spectrum and RAKE receivers for the Institute on Electronics and Telecommunications of Rennes (IETR). He then worked 6 years at Mitsubishi Electric ITE-TCL Research Laboratory where he was focusing on software radio systems and concepts. He is now an Assistant Professor at Supélec. His research is done in the SCEE Laboratory of the UMR CNRS 6164 IETR, which focuses on cognitive radio. He addresses heterogeneous design techniques for SDR as well as cross-layer optimization topics. M. Raulet et al. O. Deforges is a Professor at National Institute of Applied Sciences of Rennes (INSA). He graduated in electronic engineering from the École Polytechnique, University of Nantes, France, in 1992, where he also received a Ph.D. degree in image processing in 1995. In 1996, he joined the Department of Electronic Engineering at the INSA, Rennes Scientifc and Technical University. He is a Member of the UMR CNRS 6164 IETR Laboratory in Rennes. His principal research interests are image and video lossy and lossless compressions, image understanding, fast prototyping, and parallel architectures. Y. Sorel is a Research Director at INRIA (National Institute for Reseach in Computer Science and Control) and a Scientific Leader of the Rocquencourt’s Team AOSTE (Analysis and Optimization for Systems with Real-Time and Embedding Constraints). His main research topics are modeling of distributed real-time systems with graphs and partial order, uniprocessor and multiprocesseur real-time scheduling optimizations of systems with multiple constraints, and automatic code generation for hardware/software codesign. He is also the Founder of SynDEx, a system-level CAD software distributed free of charge at www.syndex.org. 13 +0 @; # " 0 5 $ 0 (1#/#"("# / +< +" 1 ! ( 3# #) 3 +0 +0 '- 504 $ "#0 - - ,( " & & , > 6 K 92 .$ ,$ # #$ 03 & &% : 7! 2 1$ $ ! 1 " '- " " '- " $!% < 6 9 6 '9 > 9 69 6 9 ( 6 A9 ( ,9 > ,$ '0# " , ' 9 c d6 9 * 6 ,$ '0# H cBd , 9 K 9 ( & + 9 6 * 9 > 9 < G < > > ' > > >6 >M >( 6 < > ! ,< $ 9 ( K + % / " # c ( d, >M > 9 9> 9 A 69 > % . ) H< G >M 3 >M 6 2 ) @ 6 (K 2 9 $ 5 ' > % + K ) < < 9 6 A 9 >M > WiFi Randomiser WiMAX Convolutional Coder Interleaver UMTS Constellation Mapper LUT Flip Flop < Scrambler Encoder Real Spreader Butterfly Adder NAND ) .! RS FFT-N NOT -M > & & % % 9 Over Sampler Multiplier XOR # Spreader AND ! .& 9 OR $!% < & & 6 + \ ,9 2 2 " \< , '< 0 '#0. \ @@ '- 504 $ "#0 - \ 92 < > / : ) .! ' < 6M > 6 < 9 9 9 & 0$ % 1$ $ ! " " 92 , & % & ;& " < > E c (=d )) 9 / ! < & \ 2 9 01 $ ! V' $ K ( 9 96 >< ' + 2 < '- " R 6 & + '- 9 < 9 1 " c ( d ! <. & 9 J .$ ,$ # #$ 03 2 6 9 ( " ' ( & I > 2 > 9 < 9 6 < 69 9 9 9 + 6 < > 69 ' & @: 9 6 , !-0 Z 4 , 96 2 " , '< 0 '#0. 2 2 6 2 '- $ "#0 - 504 9 9 $ /0 Q G / / 0/15 " # ! ,< / 6 _< 1 "1 9," E$ 1 K + 9 + +" ,0 #< !/%/ ) ,9 _ / " 3 5 + 2 K 9 + K 6 5 - "5 ,$ 03 , 9 9 "$ \ , , '> 9 "< ' A 96 ! & 6 A X P #> ]$ O ! A # ! , 2 6 ,$ '0#< 6 , B $ ( 4 ' , !-0 Z 4 , 0 0 # '- " c d' > ,9 '- A 3 " * 1 " 2 9 >6 9 ' " # ! ,< * 6 ,$ '0# E$ 301 3# c (;d 9 2 1$ $ ! H 6 3# 9 .$ ,$ # #$ 03 + + E C 01 $ ! V > ^ " , '< 0 '#0. @B '- @ $ "#0 - 504 - " , '< 0 '#0. .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " WIRELESS COMMUNICATIONS AND MOBILE COMPUTING Wirel. Commun. Mob. Comput. 2006; 6:1–14 Published online in Wiley InterScience (www.interscience.wiley.com) DOI: 10.1002/wcm.487 2 3 Install or invoke?: The optimal trade-off between performance and cost in the design of multi-standard reconfigurable radios 4 6 Virgilio Rodriguez†,‡ , Christophe Moy and Jacques Palicot∗ SCEE Research Team, IETR, Supélec, Cesson-Sevigné, France 7 Summary 5 8 9 10 11 12 13 14 15 16 17 We approach the design of multi-standard reconfigurable radio as a choice between two extremes: (a) installing complex, self-contained dedicated components that provide high performance at a high cost (‘Velcro approach’), versus (b) supporting all targeted standards through primitive lower level components, which reduces cost but maximises latency. To find the optimal trade-off between performance and cost, we build a mathematical model, in which we represent the design choices through a graph of progressively simpler functional modules. We illustrate our approach through a simple but realistic design example. Additionally, we show that this optimisation can be reformulated as a version of the well-known network-design problem, which opens the door to a wealth of results, algorithms and experience, already available in the scientific literature. We identify some specific algorithms that can help us find the optimal design and discuss limitations and future directions of our research program. Copyright © 2006 John Wiley & Sons, Ltd. 18 19 20 KEY WORDS: reconfigurability; software-defined radio; design; graph theory; network design problem; optimisation; multi-standard; SDR; parametrisation 21 21 1. 22 We view the design of a multi-standard reconfigurable radio as choosing the optimal point between two extremes. At one extreme is the ‘Velcro’ solution: to support several communication standards through self-contained complex components, each exclusively dedicated to a given standard. At the other extreme, we can attempt to support all desired standards through very simple, primitive operators (adders, multipliers, MAC, etc.) by invoking them repeatedly, in order to perform the various communication tasks 23 24 25 26 27 28 29 30 31 Introduction necessitated by the supported standards. The Velcro approach will generally provide the best performance, but at the highest monetary cost. In a Velcro design, much functionality is wastefully replicated. Conversely, by going to the other extreme, we can minimise the monetary cost of the radio, but at a performance level that may be unacceptable for many practical applications. Thus, we need to find the right level of complexity at which to use selfcontained dedicated components. This is the level that gives the optimal trade-off between performance and cost. *Correspondence to: Jacques Palicot, Supélec, Avenue de la Boulaie, BP 81127; 3551 Cesson-Sevigné; France. † E-mail: [email protected] ‡ V. Rodriguez is now with the Institut für Nachrichtentechnik, at the Universität Karlsruhe, in Germany. Copyright © 2006 John Wiley & Sons, Ltd. 32 33 34 35 36 37 38 39 40 41 42 43 2 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 V. RODRIGUEZ, C. MOY AND J. PALICOT To find the optimal performance/cost trade-off, we model the equipment as a hypergraph of progressively simpler functional modules. The functionality of one such module can be provided either by installing a self-contained component, or by the invoking (possibly many times) simpler (lower level) modules. Thus, for us, ‘module’ refers to a functionality, while a self-contained component is an optimised hardware/software combination, dedicated to provide a functionality in the most efficient manner. One such component could be an equaliser, for example or an implementation of the fast Fourier transform (FFT). But rather than installing such component, we could achieve the given functionality by invoking lower level modules, instead. For example some forms of equalisation can be achieved through the FFT [1]; and the functionality of the FFT operator can be achieved via a butterfly module, which itself could be implemented via CORDIC [2] (and each of the butterfly and CORDIC modules could be implemented through an installed component, or by invoking yet simpler modules, and so on). Simpler, lower level components are generally less expensive to build and can be reused by several upper level modules inside and across standards. Thus, the use of lower level components reduces the monetary cost of the equipment. Unfortunately, such use generally increases the execution time of the concerned task. As a consequence, the total execution time of a given operation may exceed practical limitations. Thus, to achieve the optimal trade-off between performance and cost, we need to answer repetitively the question: should we install or invoke?, that is should we install now, at this level of complexity, a self-contained component, or invoke yet lower level modules to achieve the desired functionality? At the foundation of our study is the ‘common operators’ approach to the design of reconfigurable equipment. Its main principle is the identification and (re)use throughout the design of common components that can each match several processing contexts, via a simple parameter adjustment. This approach can greatly increase the efficiency of a multi-standard software-defined radio, both in terms of its cost, and of the speed of reconfiguration during operation. The common operators approach is discussed more extensively in Reference [3]. The present publications builds upon some recent work of ours. In Reference [4], we introduce our formal approach to the design of multi-standard radios. To explore the cost/performance trade-off in Reference [4], we combine economic and latency considerations into a single cost function. The Copyright © 2006 John Wiley & Sons, Ltd. combined cost function (a weighted sum) is a reasonable first step, as it simplifies the exposition and the solution procedure. But we acknowledge that this combination has some drawbacks: it adds the monetary cost (paid once by the designer) with the ‘delay costs’ incurred by the user during each use, throughout the entire useful life of the radio, it fails to account for the hard time constraints, often arising from communication applications, and ultimately makes the chosen design highly dependent on the weights (which are themselves arbitrary, even if judiciously chosen with attention to real-life considerations). To address these concerns, in Reference [5], we minimise the (monetary) cost only, while considering latency through appropriate execution time constraints that cannot be exceeded. Neither of References [4,5] provides the ideal algorithm to achieve the solution. Both References [4,5] utilise exhaustive search (which may be impractical for large design problems), and Reference [4] briefly explores simulated annealing (a general algorithms not specifically oriented to our kind of problem [6]). In Reference [7], we recast our problem as a ‘network-design problem’. The network-design problem is well known and counts with a very extensive literature, which offers algorithms, computational complexity results and many practical applications in a variety of contexts such as telecommunication, transportation, water resources and even social interactions [8]. Indeed, we have already identified specific algorithms which seem particularly well suited to our purposes, and which we are examining [9–11]. In the present work, we consolidate, clarify and where appropriate correct and extend the work reported in References [4,5,7]. Neither our previous nor the present work addresses the identification of new common operators, useful to the design of multi-standard reconfigurable radios. Such identification is, in its own right, an active area of research. Researchers are proceeding along several directions. For instance, Reference [1] shows that many important tasks of a communication receiver can be implemented through the FFT. In turn, the FFT can be implemented via the butterfly operator, and, as argued more recently [2], via CORDIC. With this in mind, some researchers are seeking frequency-domain implementations for different families of algorithms. For example Reference [12] studies the frequencydomain implementation of Reed–Solomon channel decoding. Other relevant works describing interesting approaches to the design of reconfigurable radios include: References [13,14], which argue for parametrised Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 THE OPTIMAL DESIGN OF MULTI-STANDARD RECONFIGURABLE RADIOS 174 design from a ‘common functionality’ perspective, as originally advocated in Reference, [15]; Reference [16] whose approach is inspired by object-oriented programming; Reference [17] which attempts to cover hardware and software design under a common methodology; and Reference [18] which proposes designs that integrate the entire system on a chip (SoC). The rest of the paper proceeds as follows. First, we build the mathematical model. This step includes the drawing of a graph that represents design alternatives, the consideration of possible performance metrics and the specification of an objective function and appropriate constraints. Subsequently, we discuss the main approaches we have taken to consider cost and latency in the same optimisation problem. We then undertake the optimisation procedure under two different cost functions (i) weighted sum of monetary and ‘delay’ costs, as in Reference [4], and (ii) monetary cost only, with ‘delay’ as a constraint, as in Reference [5]. Subsequently, we discuss and interpret our results. Then, we show that our architecture optimisation enquiry can be reformulated as a ‘network-design problem’, and comment on specific algorithms available in the literature. We conclude the paper, with a discussion addressing further interpretations, as well as limitations and future directions of our research program. 175 2. 176 2.1. Graph-Theoretic Representation 177 As illustrated by Figure 1, we represent a multistandard radio, as a hypergraph of progressively 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 178 Mathematical Model Fig. 1. Graph corresponding to a conceivable tri-standard reconfigurable radio. Copyright © 2006 John Wiley & Sons, Ltd. 3 simpler functional modules. Each node (module) represents a functionality that can either be implemented via a dedicated hardware/software component (e.g. an ASIC, or an intellectual property core), or can be achieved by invoking lower level modules. The hyperarcs leaving a node (parent) specify the simpler modules (descendants) that could provide the required functionality through multiple calls. Descendant nodes may not all be at the same level. An OR arc (direct arrow) means that only one of the descendant nodes is necessary to implement the functionality of the parent node. For example, S1 can be implemented through any one of A1, A2 or B1. An AND arc (inverted Y connection as pointing from S3 to A4 and A5) means that all descendant nodes are needed to implement the functionality of the parent node. In some cases, a parent node may have both AND and OR dependencies with its descendants. For example there are three alternatives for providing the functionality of S2: (1) installing a self-contained complex component dedicated to S2, (2) making available both A2 and A3, (3) making available A4. The roots of this graph, at the highest level, represent the standards to be supported by the radio. The fact that the graph is acyclic helps the process of finding the solution to our design problem. 179 2.2. 205 Optimisation Parameters The decision to provide a functionality via a selfcontained, dedicated component or by invoking multiple-times simpler modules is determined by two key considerations: (monetary) cost and (execution) time. The monetary cost of a component (which is paid only once during the useful life of the radio) represents the total cost of including the component in the design. In some architectures for reconfigurable radios, the monetary cost can be represented by (is proportional to) the number of logic units necessitated by an FPGA implementation. It is best to take the view point of a system integrator that ‘outsources’ the components (software or hardware). Then, the monetary cost represents the fair market value, at design time, of acquiring the finished component in the open market. Execution time (incurred every time a component is employed throughout the life of the system) is also a critical consideration, because the user has limited patience, and in fact, communication standards impose hard time constraints (deadlines) for the completion of certain operations. Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 4 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 V. RODRIGUEZ, C. MOY AND J. PALICOT 2.3. Considering Both Cost and Latency: Two Approaches There are at least two basic approaches to considering cost and latency in the design of a reconfigurable equipment: (i) the ‘combined’ cost function and (ii) the deadline approaches. As we have done in Reference [4], we can combine economic and latency considerations into a single cost function (a weighted sum of both ‘costs’). The combined cost function has at least three serious problems: (i) it adds the monetary cost (paid once by the designer) with the ‘delay costs’ incurred by the user, each time it executes a standard throughout the useful life of the equipment, (ii) it fails to account for the hard time constraints, often arising from communication applications, and (iii) it makes the design highly dependent on the weights, which are themselves arbitrary. Nevertheless, the combined cost function is a reasonable first step, as it simplifies the exposition and the solution algorithm. It may also be helpful in pre-design work, and could allow the designer to gear a design to the ‘luxury’ (high performance) or ‘value’ market sectors. As an alternative, as in Reference [5], we can minimise the economic cost only and take latency into account through execution time constraints (‘deadlines’) that cannot be exceeded. However, determining the ‘deadlines’ to be observed while designing the radio to support a given standard is nontrivial and should be done with great care. If deadlines are set too high, they may lead to a design that fails to perform at acceptable levels in practical situations. But if deadlines are set lower than necessary, the resulting design will be more expensive than necessary. The key to determining the right delay tolerances is to examine the ‘transmission chain’, applicable to each supported standard (the block diagram depicting the various steps necessary to establish end-to-end communication under a considered standard, as shown by Figure 2), and the numerical specifications provided by the standardisation bodies. A chosen architecture must yield a performance able to support end-to-end communication under each of the supported standards. We do not further address here the determination of these deadlines and assume that the pertinent information has been obtained elsewhere and given to us. 274 2.4. 275 Conceptually, the ‘deadline approach’ includes as a special case, the ‘combined cost’ approach. This is 276 Formalising the Optimisation Problem Copyright © 2006 John Wiley & Sons, Ltd. Fig. 2. The key to determining appropriate ‘deadlines’ is an examination of the ‘transmission chain’. This corresponds to GSM, but our development does not specifically target that standard. so, because when we use the combined cost, we can pretend that this cost is the original monetary cost, and that we ignore the deadlines because they are ‘very large’. Thus, we simply choose the least expensive design. Presumably, the designs that are ‘too slow’ are also (under the combined cost function) very expensive and we would not choose them for that reason. Thus, below we formalise the deadline approach only, and think that when we use the combined cost function, all deadlines are set to infinity. Let: Si with i = 1, . . . , I be the standards to be supported, δi the ‘design deadline’ of standard Si . Fk with k = 1, . . . , K be 0-1 variables such that, Fk = 1 if component k is chosen to be installed in selfcontained (dedicated) form. Ck and Tk be respectively the (monetary) cost and the execution time of component k (if self-contained). τi (F1 , F2 , . . . , FK ) be the time of executing the tasks used to calculate the ‘deadline’ of standard i for a particular choice of components. Notice that not all the possible designs are acceptable. For example if there are three root nodes (standards), and the self-contained components that can each execute an entire standard (for the ‘Velcro’ design) are labelled 1, 2 and 3, then, the point (1, 1, 1, 0, 0, . . . , 0, 0), which means setting F1 = F2 = F3 = 1 and all others Fj = 0, is in principle acceptable (but not necessarily optimal, of course!. . . this is the ‘Velcro’ architecture). But some other choices would not support the desired standards (e.g. Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 THE OPTIMAL DESIGN OF MULTI-STANDARD RECONFIGURABLE RADIOS 308 309 310 311 312 313 314 315 (0, 0, . . . , 0)), which means building nothing at all, is clearly unacceptable. Let denote the set of all K-tuples of binary numbers that correspond to acceptable choices of the set of components (this is information taken directly from the hypergraph). Then we seek to solve: min (F1 ,F2 ,...,FK )∈ Fk Ck subject to τ1 (F1 , F2 , . . . , FK ) ≤ δ1 .. . τI (F1 , F2 , . . . , FK ) ≤ δI 316 (1) 317 3. 318 In principle, the preceding problem can be solved directly by computing the cost of every feasible design (choice of components that supports the desired standards and obeys the deadlines, if applicable), and choosing the design of least cost. For larger problems, a ‘smarter’ algorithm is needed. Below, we provide an illustration which can be solved with minimal computing effort. 319 320 321 322 323 324 325 Solving the Optimisation 326 3.1. A Realistic Illustration 327 Figure 3 (representing an evolution of a figure from Reference [4]) shows a sub-graph, corresponding to the decomposition of several processing elements (equalisation, multi-channel, OFDM) that could be part of a multi-standard radio system. The sub-graph is not intended to show all the possible alternative implementations for each of the considered processing elements. Root nodes (standards) are not shown. The equalisation block compensates for the multipath impairment typical of wireless channels. It can be either implemented through a finite-impulse response filter (FIR) or through the FFT operator. The implementation of equalisation in the frequency domain is particularly attractive for channels that exhibit long-impulse responses, which leads to FIR filters with a very high number of taps [19]. Notice that the inverse FFT is computationally equivalent to the FFT; thus, we attach a multiplicative factor of 2 to the arc pointing from the equaliser to the FFT. 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 Copyright © 2006 John Wiley & Sons, Ltd. 5 Multi-channel refers to the channelisation function of a cellular base station. This can be accomplished via the ‘classical’ channel per channel procedure, or by proceeding in parallel, through a filter-bank channeliser (which can be implemented via FFT). Other lower level modules correspond to well-known signal processing constructs. The graph shows that at least the FFT operator is needed to implement OFDM modulation. The graph also shows that both equalisation and OFDM could employ the same FFT operator, although it is optional for FIR-based equalisation. A reconfigurable FFT component could provide the functionality of FFT operators of different orders. The numerical values, near the bottom right of a block represent cost/time associated with the component that could provide that functionality. The units of measurement are immaterial for our purposes. We only need relative figures to be able to compare one design alternative to another. But the time figures must be consistent with those in which the deadlines are expressed. At the top left, there is a numerical identifier for the corresponding module. The arcs are tagged with a number of calls (NoC) figure. When a node is needed several times by a higher level module, it is called several times, and not physically replicated. Accordingly, the multiple calls affect the latency of the system, but not its monetary cost. Below, we assume that the channeliser handles 25 channels, that the FFT is of order 64, and that to implement this FFT through a butterfly, 192 calls are needed. 346 3.2. 379 Results and Interpretations (1) Deadline approach: The sub-graph of Figure 3 is small enough to be solved by exhaustive search, although the number of design alternatives quickly explodes. The thick broken blue line in Figure 3 shows an FFT-only design, which is a conceivable outcome of the optimisation, for a specific set of deadlines. In Figure 4, we integrate the information in Table I and Figure 3 into the widely available spreadsheet model, which may be sufficient for smaller designs. Figure 4 illustrates the solution procedure and shows some interesting design alternatives. For this illustration, we have ignored the designs that use the simplest (lowest level) components (adder, multiplier, etc). Thus, the simplest considered component is the CORDIC. Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 6 V. RODRIGUEZ, C. MOY AND J. PALICOT Fig. 3. Partial hypergraph with some design parameters and a plausible solution indicated through the thick broken (blue) line. A block represents a functionality (module). Near the bottom right of a block are cost/time values, associated with the component that could provide the concerned functionality if installed (the number at the top left is just an identifier). 396 397 398 399 400 401 402 403 404 405 406 407 A 1 in the column corresponding to a module means that, in that particular design, the module is implemented via a dedicated component. T 1, T 2 and T 3 are the execution times corresponding to OFDM, equalisation and the channeliser for a given design. The deadlines should be applied to the entire transmission chain (not to individual tasks). Nevertheless, for the sub-design illustration under consideration, we pretend that the tasks at the top of the graph have associated deadlines (determined by the supported communication standards). We have considered that an order-64 FFT operator satisfies Copyright © 2006 John Wiley & Sons, Ltd. our requirements and that the multi-channeliser handles 25 channels. The ‘Cost’ column shows that the least expensive design consists of a CORDIC component only, but it performs slowly. This would be the chosen design if the deadlines are, for example 6200, 12 300 and 320 000, respectively (recall that no time unit has been specified so far). If the deadlines are tighter, the CORDIC-only design falls outside the feasibility region. Then other candidates may be chosen. For example the butterfly-only design performs a good deal better Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 408 409 410 411 412 413 414 415 416 417 418 419 THE OPTIMAL DESIGN OF MULTI-STANDARD RECONFIGURABLE RADIOS 7 Fig. 4. The figure illustrates how to integrate the design data and procedure into the widely available spreadsheet model, which may be sufficient for smaller designs (the data displayed vertically may be easier to read in Table I). Only certain interesting design alternatives are shown. A row represents a design. A 1 in the column corresponding to a module means that this module is implemented via a self-contained component. T 1, T 2 and T 3 are the execution times corresponding to, respectively, OFDM, equalisation and the multi-channel channeliser for a given design. The ‘Cost’ column shows that the least expensive design consists of a CORDIC component exclusively, but performs poorly. The ‘Velcro’ solution (bottom) performs fastest but costs most, as it implements the top modules with self-contained components. 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 in all three tasks, and is only marginally more expensive. The FFT-only design costs about 70 times more than the butterfly-only alternative, but performs seven to eight times better, and could very well be the optimal choice under tighter time constraints. The ‘Velcro’ solution performs best and costs most, as it implements the top modules with dedicated components. The FFTonly design is far behind the Velcro approach, only with respect to T 3 (multi-channeliser). This performance gap narrows considerably if one complements the FFT component with a filter-bank component. The Filter-bank/FFT combination provides performance near Velcro’s, for about one quarter of the cost. (2) Combined cost function: We can use the data of Figure 4 to build Table II and illustrate the Copyright © 2006 John Wiley & Sons, Ltd. combined cost function approach, which we first applied in Reference [4]. In Table II, the columns labelled CC(w) contain for the corresponding design the ‘combined cost’, which is a weighted sum of the form w × cost + T 1 + T 2 + T 3, where w is the weight. A weight of zero simply means that ‘money is no object’, and we want the best performing design at any monetary cost. In that case, as column CC(0) shows, the ‘Velcro’ design is optimal. But as soon as we consider cost at all, even with a fairly small weight of only 1 (column CC(1)), the Velcro approach becomes non-optimal. With a weight of 13 the FFT-only design is the winner, which is consistent with the results presented in Reference [4]. And it takes a much larger weight (1000) for the butterfly-only design to become optimal. With a huge weight on Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 8 V. RODRIGUEZ, C. MOY AND J. PALICOT Table I. The table shows for each module in Figure 3, its numeric identifier, verbal description, as well as its monetary cost and execution time, if implemented through a self-contained component. Money and time units are not specified. 454 455 456 457 ID Description Money Time 73 72 71 62 61 51 42 41 31 23 22 21 13 12 11 4 3 2 1 OFDM Equaliser Multi-channeliser Filter bank Chanel-per-channel Multi-rate filter FIR Down sampler FFT Butterfly Down converter CIC filter MAC CORDIC CIC cell Look-up table (LUT) Adder Multiplier Delay 1200 1500 10 000 2000 1500 700 500 1 1000 15 30 300 12 11 10 1 4 10 1 300 200 10 100 200 700 1000 1 500 20 10 50 8 8 2 1 2 5 1 monetary cost (50 000), which essentially means we seek the least expensive design without concern for performance, the CORDIC-only design is chosen. 459 4. Toward an Efficient Algorithm: The Network Theory Reformulation 460 4.1. 461 Above, we have described a formulation that allows us to find for a multi-standard reconfigurable radio, an architecture which is optimal in view of both cost (money) and performance (computational execution time). We can find the exact optimum through exhaustive search (‘brute force’). In Reference [4], we also explored finding an approximate solution through simulated annealing. The ‘brute force’ approach is 458 462 463 464 465 466 467 468 Motivation computationally impractical when there is a very large number of design alternatives. The second approach utilises a ‘generic’ algorithm not specifically oriented to the problem at hand, and may or may not yield a solution near the ‘true’ optimum. A third alternative is to seek a ‘smart’ algorithm which fully utilises the special structure of the graph to efficiently search the solution space. In the present section, we move in that direction, by recasting our problem as a singlesource (or single terminal (sink)) ‘network-design problem’. The network-design problem is well known and counts with a very extensive literature, which offers algorithms, computational complexity results and many practical applications in a variety of contexts such as telecommunication, transportation, water resources and even social interactions [8]. Below, we show how to perform the problem reformulation and give a specific example grounded on a practical design. In support of our approach, we identify and discuss several specific algorithms which seem particularly well suited to our purposes [9–11]. 469 4.2. 491 ‘Network-Design’ Problem Figure 5(a) provides a simple illustration of the singlesource network-design problem. Suppose there is an initial point called the origin, o, and three ‘terminals’ (destinations), t1 , t2 and t3 . We wish to connect o to each ti in a way that satisfies certain optimality criteria. There may also be some intermediate nodes, which themselves may be (partially) interconnected. There are many conceivable solutions. The most obvious one is simply to build three direct ‘roads’, one from o to t1 , a different one from o to t2 and a third one from o to t3 . This solution is certainly plausible and probably provides the shortest ‘travel times’, but it is unlikely to be ideal. By building dedicated roads, we are possibly missing the savings resulting from having ‘common segments’ which may be used by all traffic, Table II. For each design, the combined cost (weighted sum) is obtained as CC(w) = w × cost + T 1 + T 2 + T 3 where w is the weight. Design CORDIC CORDIC+DSamp Butterfly FIR+CORDIC FIT+butterfly FFT FilterBk+FFT ‘Velcro’ Cost T1 T2 T3 CC(0) CC(1) CC(13) 11 12 15 511 515 1000 3000 12 700 6144 6144 3840 6144 3840 500 500 300 12 288 12 288 7680 1000 1000 1000 1000 200 313 344 307 925 195 840 31 144 28 840 25 500 100 10 331 776 326 357 207 360 38 288 33 680 27 000 1600 510 331 787 326 369 207 375 38 799 34 195 28 000 4600 13 210 331 919 326 513 207 555 44 931 40 375 40 000 40 600 165 610 Copyright © 2006 John Wiley & Sons, Ltd. CC(1000) 342 776 338 357 222 360 549 288 548 680 1027 000 3001 600 12700 510 CC(50 000) 881 776 926 357 957 360 25588 288 25783 680 50027 000 150001 600 635000 510 Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 THE OPTIMAL DESIGN OF MULTI-STANDARD RECONFIGURABLE RADIOS 9 (a) (b) Fig. 5. The network-design problem and the design of multi-standard reconfigurable radios. (a) A simple network-design problem; (b) The ‘network-design’ version of a section of the design graph. Solid arrows indicate ‘free’ pre-built ‘algorithmic roads’ that take time to travel, but require no monetary expenditure. 507 508 509 510 511 512 513 514 regardless of the destination. For example we could build a road from o to some intermediate point, A, plus dedicated segments from A to each ti . Because the segment o → A is used by all traffic, certain savings may result (although this may lengthen the travel time to all traffic). By the same reasoning, building dedicated segments from A to each ti may not be optimal. Perhaps we can Copyright © 2006 John Wiley & Sons, Ltd. build a dedicated road A → t3 , but also a segment from A to some intermediate point B, followed by segments B → t1 and B → t2 . In the latter proposal, all traffic would travel over the o → A segment, the t3 traffic would go directly over A → t3 while both the traffic to t1 and to t2 would continue over A → B, and from B over B → t1 and B → t2 , respectively. There are many other possibilities. Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 515 516 517 518 519 520 521 522 10 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 V. RODRIGUEZ, C. MOY AND J. PALICOT 4.3. From a Hypergraph to a Network-Design Problem Our problem can be recast as a network-design problem by proceeding as follows. The communication standards correspond to terminals we wish to reach. Building a direct road from the origin to each terminal corresponds to the ‘Velcro’ solution: choosing a selfcontained dedicated complex component to support each standard. This will provide the ‘fastest routes’ to be sure, but probably not the ideal solution. Installing a self-contained component of ‘medium’ complexity is the equivalent of building a road from the origin to the intermediate point A in the example above. For instance, Reference [1] shows that many important tasks of a communication receiver can be implemented through the FFT. In turn, the FFT functionality can be implemented with a butterfly operator. Thus, we can ‘build a road’ from the origin to the FFT (i.e. install a self-contained FFT component) to be shared by several tasks. This will likely reduce the overall cost of the design, although it will generally ‘increase the travel time’. But we could also ‘reach’ the FFT node (i.e. provide the functionality of the FFT operator), by first ‘building a road’ from the origin to the intermediate junction ‘butterfly’ and from there utilise an ‘existing road’ to the FFT (i.e. install a self-contained butterfly component, and invoke it repeatedly to provide the FFT functionality). Further explanation of the reformulation is given below through examples. 4.4. Illustration of the Graph-to-Network Conversion Figure 5(b) shows the network-design problem, corresponding to a section of the realistic hypergraph of design choices given by Figure 3. The parts of the graph that concern lowest level components and the channeliser have been ignored for pedagogical reasons. Recall that now the objective is to build a ‘road network’ that provides a path from the origin to each ‘terminal’. The solid arrows mark ‘free’ (pre-built) roads that take time to travel, but require no monetary expenditure. These represent known algorithms which can be used to provide a higher level functionality. For example from the FFT node there is a pre-built (free) ‘road’ to OFDM, another to equalisation and yet another to the FIR node, because there are known algorithms that allow the implementation of equalisation, OFDM and FIR through the FFT operator [1]. The dashed arrows indicate ‘possible roads’ that definitely cost money to build (but possibly negligible time to travel). ‘Building Copyright © 2006 John Wiley & Sons, Ltd. a road’ means installing a self-contained component to provide the functionality pointed by the arrow. For example there is a dashed arrow from the origin to FFT (meaning we can provide the FFT functionality by installing a self-contained FFT component). There is also a ‘possible road’ from the origin to butterfly. If we build this ‘road’ (i.e. if we install a self-contained butterfly component) we can ‘reach’ the FFT node via a free (algorithmic) path (shown by a solid arrow from butterfly to FFT), because one can provide the FFT functionality by repeatedly invoking a butterfly operator. Figure 6 shows in greater detail the ‘networkdesign problem’, corresponding to Figure 3. The fact that the digital down conversion module can be done through CORDIC, and the lowest level modules are not considered, for clarity. And some obvious dashed arrows from the origin (e.g. from the origin to equaliser, meaning installing an equaliser component) are omitted to reduce clutter. The main purpose of Figure 6 is to show that representing the AND dependencies is challenging in some cases, and may require some ingenuity and additional research. The FIR ‘triple node’ illustrates some of the main issues. A ‘triple’ FIR node is used in Figure 6 to show that there are three different ‘routes’ to get the FIR functionality, and the route has an impact on what can be done from there. The ‘sub-node’ FIR3 concerns the case where the FFT is used to implement the FIR. The ‘sub-node’ FIR2 refers to the case when an FIR component is installed, even though the FFT functionality is present (this may be done to reduce execution time). FIR1 denotes a case in which an FIR component is installed and the FFT functionality is not available (perhaps OFDM and equaliser dedicated components have been installed, and the FFT module is unnecessary). In the cases FIR2 and FIR3 , the functionality of the filter-bank module can be achieved algorithmically, because both the FFT and the FIR are available. However, under FIR1 , there is no FFT module, thus, the filter-bank functionality would require an installed component. For this reason, there is no arrow pointing to the filter-bank module from the FIR1 sub-node. On the other hand, the solid arrow pointing to the equaliser from the FIR is drawn from the oval, around the various FIR sub-nodes. This indicates that we can achieve equalisation through the FIR, irrespective of how we achieve the FIR functionality (contrary to the filter-bank case). Shown also on Figure 6 is a simpler case of AND dependency, which involves achieving multirate filtering (MRateFilt) through both FIR and down sampling (DSamp). The dash line from the FIR oval Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 THE OPTIMAL DESIGN OF MULTI-STANDARD RECONFIGURABLE RADIOS 11 Fig. 6. Network view of the partial hypergraph given in Figure 3. Some low level connections not shown. 624 625 626 627 628 to DSamp, followed by a solid arrow from DSamp to MRateFilt, shows that if we have the FIR functionality (no matter how) and we install (‘build a road to’) a down sampler, then we can achieve multi-rate filtering ‘for free’ (algorithmically). 629 4.5. 630 The reformulation of Section 4 provides us with several well-studied algorithms. Of these algorithms, we shall now identify and discuss some that seem particularly promising. Reference [9] considers a network-design problem with two metrics: cost and ‘distance’. This fits well with our formulation, with execution time as ‘distance’. It follows the first approach discussed in Section 2.3, that is it combines both cost and ‘distance’ into a single objective. But this algorithm is probabilistic. Reference [10] modifies it to make it deterministic, utilising linear programming. The basic idea of Reference [9] is as follows. The algorithm has several stages. At each stage, it forms a matching (‘pairing’) of terminal nodes (not previously eliminated). Then, of each pair of nodes, it chooses a ‘centre’. At this point, the non-centre nodes are removed from further consideration and a new stage of the algorithm starts (with a new matching among the ‘centres’ only). Because for us, the terminals correspond to standards to be supported by the radio, they should be relatively few. Thus, this algorithm will give an answer in a relatively short number of stages. 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 Possible Algorithms Copyright © 2006 John Wiley & Sons, Ltd. For reasons given in Section 2.3, it may be preferable to utilise an algorithm which minimises costs, while keeping ‘length’ (time/latency) under a specified budget. Reference [11] provides an algorithm that does exactly that. However, this algorithm appears more complex than the preceding ones. Currently, we are evaluating these algorithms, while continuing our exploration of the rich network-design literature. 653 5. 662 Discussion and Outlook We have built a mathematical model that enables us to identify an architecture for a multi-standard reconfigurable radio, which is optimal, in view of both performance and cost. The chosen architecture represents an optimal point between two extremes: (i) highly complex communication components, each exclusively dedicated to a given standard (‘Velcro’ approach), and (ii) very simple components to be invoked by ‘higher layers’, in support of the various standards. We represented the radio as a hypergraph of progressively simpler functional modules. We have illustrated our approach through a simplified ‘sub-design’. We do not claim that it corresponds to a system available today, but it is sufficiently close to reality to provide useful insights. Our results agree with intuition. When the execution time constraints, imposed by the supported standards Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 654 655 656 657 658 659 660 661 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 12 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 V. RODRIGUEZ, C. MOY AND J. PALICOT on the considered high-level modules are very tight (or when the weight given to monetary cost in a combined cost function is relatively low), the more complex dedicated components are chosen. On the other hand, if the time constraints are lenient (or if the weight given to money in a combined cost function is relatively high), money is saved by supporting the standards through simpler (lower level) components, such as the FFT, butterfly or even the CORDIC operator. The cost and time figures we have used are, in our judgement, reasonable and consistent. But we acknowledge that obtaining better estimates of these values for realistic designs is an important future task. Our analysis also sheds some light on some of the economic issues that may arise when a single architecture targets several communication standards. An optimal architecture that exclusively supports standards oriented to ‘slow’ applications, such as voice and text messaging, will favour lower cost, simpler, reusable components. Thus, a consumer who is primarily interested in such ‘traditional’ applications will be better off by purchasing equipment that supports only a few simple communication standards. In other words, certain care must be taken in choosing the set of standards to be supported by a single multi-standard reconfigurable radio. The algorithm chosen to find the optimal architecture requires additional attention. In solving our ‘subdesign’ illustration, we adopted a ‘manual’ (spreadsheet) approach, while in previous work, we explored briefly simulated annealing [6]. Both of these choices have problems already discussed. In search of a better algorithm, we have reformulated our design problem as a ‘network-design problem’, which opens the door to an extensive literature, offering many algorithms and results. Indeed, we have already identified and discussed some specific network-design algorithms which seem particularly promising. While we have shown simple examples of the transformation of the hypergraph of design choices into a networkdesign problem, we have not formally proved that every conceivable such hypergraph can be exactly represented as a network-design problem. But even if this representation is not possible in specific cases, by ignoring certain non-essential connections, one may obtain a network-design problem that is sufficiently close to reality for its solution to provide a good approximation to the optimal design (which perhaps can then be revised manually). As an alternative, one could study the inner workings of an appropriate network-design algorithm and try to adapt it to the problem at hand. Copyright © 2006 John Wiley & Sons, Ltd. Several important relevant issues have been neglected so far, or require additional attention. For example, we have assumed that for every functional module, there is exactly one component which can provide the desired functionality at a given performance level, and for a given cost. In practise, for a given functionality (say the FFT operation), the market may provide many alternatives, each offering a different price/performance tradeoff. Our formulation can be modified to consider various components providing the same functionality. Building the hypergraph of design choices is itself an object of research. We have already pointed out that researchers are trying to identify new operators that may be common to several communication blocks within a given standard, or across several standards. The identification of any such operator will lead to a modification of the graph. Likewise, some researchers seek frequency-domain algorithms that provide functionality that is typically provided by time-domain procedures. The discovery of any such algorithm will imply a modification of the graph with new arcs, pointing to the FFT operator. Also, as communication standards evolve, the graph may need to be modified. Other important issues requiring (additional) attention include consideration of (i) the time needed to re-configure the radio, while switching from a standard to another, (ii) the ‘travel time’ of signals from a component to another inside the radio, and (iii) the possible contention among high-level modules for the service of the same lower level module. This contention may be particularly important if the radio needs to support simultaneous communication over several standards at the same time, such as, for example, downloading a file through a wireless local area network, while simultaneously video conferencing through UMTS. Our framework is easier to visualise if one thinks of a ‘component’ as a physical entity (such as a chip) that may or may not be installed. However, our approach is more general than that. For instance, it can accommodate a central processor-based architecture. In a design based on a digital signal processor (DSP), the components could be ‘objects’ (in the object-oriented programming sense, as advocated in Reference [16]). The cost of one object would be its ‘fair market value’ if ‘outsourced’. The basic trade-off between performance and cost would remain. Presumably, primitive objects should be very inexpensive to outsource or develop. But supporting all communication tasks by invoking very simple objects Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 THE OPTIMAL DESIGN OF MULTI-STANDARD RECONFIGURABLE RADIOS 806 should likely lead to unacceptably slow designs. At the other extreme, a highly optimised object that is dedicated to support a standard should provide the best performance, but require the highest monetary expenditure, whether this object is outsourced or developed ‘in house’. On the other hand, the tradeoff between the processing power and the cost of the DSP should also be considered in the optimisation, which we have not directly addressed in the present report. Besides cost and computing performance, other factors such as the DSP’s energy consumption and heat production may have to be considered as well. Ultimately, we are well aware that we have raised more questions than we have answered. Nevertheless, finding for a multi-standard reconfigurable radio, an architecture that is optimal in view of both performance and cost is a very worthy cause. We believe that this report establishes the general validity and usefulness of our approach. Indeed, its greatest contribution might be to attract the attention of other researchers whose work may address some of the important questions we have left unanswered. 807 Acknowledgement 808 809 Financial support by the ‘Région Bretagne’ of France is gratefully acknowledged. 810 References 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 1. Palicot J, Roland C. FFT: a basic function for a reconfigurable receiver. In Telecommunications. 10th International Conference on, vol. 1, Papeete, Tahiti, pp. 898–902, 2003. 2. Heyne B, Goetze J. A pure CORDIC based FFT for reconfigurable digital signal processing. In 12th European Signal Processing Conference, Vienna, Austria, September 2004. 3. Palicot J, Roland C, Louet Y, Al Ghouwayel A. A new parameterization technique for reconfigurable radio: the common operator approach, 2006. Technical Report TR/SCEE/01.V1/2006. Supélec (Campus de Rennes): CessonSevigné, France. 4. Moy C, Palicot J, Rodriguez V, Giri D. Optimal determination of common operators for multi-standards software-defined radio. In 4th Karlsruhe Workshop on Software Radios, Karlsruhe, Germany, March 2006. 5. Rodriguez V, Moy C, Palicot J. An optimal architecture for a multi-standard reconfigurable radio: Cost-minimising common operators under latency constraints. In 15th IST Mobile and Wireless Communications Summit, Myconos, Greece, June 2006. 6. Kirkpatrick S, Gelatt C, Jr., Vecchi M. Optimization by simulated annealing. Science 1983; 220(4598): 671–680. 7. Rodriguez V, Moy C, Palicot J. An optimal architecture for a multi-standard reconfigurable radio: a network theory re-formulation. In Personal, Indoor and Mobile Radio Communications. 16th IEEE International Symposium on, Helsinki, Finland, September 2006. Copyright © 2006 John Wiley & Sons, Ltd. 13 8. Magnanti T, Wong R. Network design and transportation planning: models and algorithms. Transportation Science, 1984; 18(1): 1–56. 9. Meyerson A, Munagala K, Plotkin S. Cost-distance: Two metric network design. In Foundations of Computer Science. 41st Annual Symposium on, Redondo Beach, CA, U.S.A., November, pp. 624–630, 2000. 10. Chekuri C, Khanna S, Naor J. A deterministic algorithm for the cost-distance problem. In 12th annual ACM-SIAM symposium on discrete algorithms, Washington, DC, USA, January, pp. 232– 233, 2001. 11. Marathe MV, Ravi R, Sundaram R, Ravi SS, Rosenkrantz DJ, Hunt HB. Bicriteria network design problems. Journal of Algorithms 1998; 28: 142–171. 12. Al Ghouwayel A, Louët Y, Palicot J. A reconfigurable butterfly architecture for Fourier and Fermat transforms,’ In 4th Karlsruhe Workshop on Software Radios, Karlsruhe, Germany, March 2006. 13. Wang H, Cai T, Song J. Analysis of common structure and software download for software defined radio. In International Conference on Communication Technology, Beijing, China, August, vol. 2, pp. 1112–1116, 2000. 14. Jondral F. Parametrization—A technique for SDR Implementation. In Software Defined Radio—Enabling Technologies, Tuttlebee W (ed.). John Wiley & Sons: London, 2002; 233–256. 15. Rhiemeier A-R. Benefits and limits of parameterized channel coding for software radio. In 2nd Karlsruhe Workshop on Software Radios, Karlsruhe, Germany, March 2002. 16. Jha US. Object oriented HW functions accelerate communication, computing, and multimedia convergence. In Personal, Indoor and Mobile Radio Communications. 16th IEEE International Symposium on, Berlin, Germany, September 2005. 17. Tuttlebee W. Software-defined radio: Facets of a developing technology. IEEE Personal Communications 1999; 6(2): 38–44. 18. Belzile J, Bernier S, Uhm M. Software radio modems enter the SoC era. COTS Journal 2004; 6. http://www.cotsjournalonline. com/home/article.php?id=100191. 19. Bidet E, Cardin J, Kouam M, Joanblanq C, Palicot J. FDF, a 512TAP FIR filter using a mixed temporal-frequential approach. In IEEE Custom Integrated Circuits Conference, Santa Clara, CA, USA, pp. 173–176, 1995. 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 Authors’ Biographies 879 Virgilio Rodriguez is a Research Associate with the Institut für Nachrichtentechnik, at the Universität Karlsruhe, in Germany. He is involved with ultra wide band (UWB) technology, as part of the European project PULSERS. From the Fall of 2005 through the summer of 2006, he was at Supélec (Campus de Rennes), in France, where he applied mathematical formalisms to achieve designs of multistandard reconfigurable radios that are optimal in view of both cost and performance. Before this, he spent a year as a Research Fellow at the University of Surrey, England, where he applied market principles to the dynamic management of the radio spectrum, as part of the End-to-End Reconfigurability (E2 R) European project. He received his PhD degree in May 2004, from the Polytechnic University (Brooklyn, New York), where he focused on analytical studies for radio resource management in the context of thirdgeneration cellular communication networks. Previously, he received Master’s degrees from both Purdue University and 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 14 V. RODRIGUEZ, C. MOY AND J. PALICOT 901 902 the Virginia Polytechnic Institute, for work, respectively, in electrical and systems engineering. 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 Christophe Moy is an engineer of the INSA (National Institute of Applied Sciences), Rennes, France, 1995. He received his M.Sc. and Ph.D. degrees in Electronics in 1995 and 1999 from the INSA. He worked from 1995 to 1999 on spread spectrum and RAKE receivers for the IETR. He then worked 6 years at Mitsubishi Electric ITE-TCL research lab where he was focusing on Software Radio systems and concepts, including digital signal processing, HW and SW architecture, co-design methodology and reconfiguration. He also addressed the design of SDR impulse UWB systems. He represented Mitsubishi Electric at the SDR Forum and worked on the French research program A3S, as well as the IST project E2 R phase 1. He is now an Associate Professor at Supélec, campus of Rennes, France. His research is done in the SCEE laboratory, specialized in software and cognitive radio, which is part of the IETR. He addresses heterogeneous architectures design and reconfiguration management for Copyright © 2006 John Wiley & Sons, Ltd. SDR and cognitive radio. He participates to the IST Network 923 of Excellence Newcom, the IST project E2 R phase 2, as well 924 as the French research program IDROMel. 925 Jacques Palicot received the Ph.D. degree in signal processing from the University of Rennes, France, in 1983. Since 1988, he has been involved in studies about equalization techniques, applied to digital transmissions and new analog TV systems. Since 1991, he has been involved mainly in studies concerning the digital communications area and automatic measurement techniques. He has taken an active part in various international bodies (EBU, CCIR), and in European projects. He has published various scientific articles, notably on equalization techniques, echo cancellation, neural techniques and hierarchical modulations. He is currently involved in adaptive signal processing and in new techniques as software radio and Cognitive Radio techniques. Since October 2003, he has been with Supelec, Campus of Rennes, where he is a Professor and head of the Signal, Communication & Embedded Electronics research team. Wirel. Commun. Mob. Comput. 2006; 6:1–14 DOI: 10.1002/wcm 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 '- 504 $ "#0 - - ,* .$ ,$ # #$ 03 1$ $ ! 9 < 9 + + 6 2 *9 & ,9 > ) + > 2 6 & " 1 , - 4 2 6 A 9 $ " + c (:d 3 A c (:d < 6 ( < 2 >6 > >6 5 > -1 5 , c?d K 9 & , >& 2 ( >& , '< 2 / 6 >& 6 K 2 < 6 A9 & 9 @ K B 6 + 6 2 > T0 3#0 > c (:d >6 " 9 ( A 2 > / ! 6 2 9 > 2 & 6 < & * '- " >( 6 c (@d 5 69 ( 6 A9 ,9 > 1 % >& '- " +.! & , 9 1 " , 6 0 /< ( 6 9 2 A 6 > 9 < K 6 Mode 1 Mode 2 Mode n ) .! G M $!% < & & $ & !. & & % ." % %3 1 &"! % 7! ! %7! ( " +.! & & %& 5 & & &!0 : /$ % " &N 3 1 .&!" 5 < < 6 2 < + + + > 6 < >6 < < c (Bd > " 9 92 A " $!% < K & , '< 0 '#0. B= '- $ "#0 - , / 504 9 - 6 9 > 26 =< ,=g 5 6 + K % > 9 , > > ) < 2 .$ ,$ # #$ 03 , ,=g 5 O6 "$ '% & '- " ' < 69 % & 2 2 < $ 9 '- 6 2 >6 )< / ! > 2 K > 1 " < % < 1$ $ ! 1" / ! 9 K 6 ( T 9 , 2 < ) = )< 9 6 6 = 1 2 9 < 9 > Global manager Standard parameter control Dispatches orders to lower layers Config. Manager L1_ReM Standard Set Config. Management Level 2 L2_ReMUs : – – – Function Level Independent of the HW Manages several elementary PBprocessing blocks of lower granul. Processing blocks configuration Embedded very closely to the PB Dedicated to the nature of reconfigurable resources ) .! HM Function Set Function Set L3_ReMU L3_ReMU Block Set Block Set " +.! & 1* > 2 . , 6 %, g 5) 9 . ' < 2 6 K 9 9 , < ,=g 5 >6 D > < K > / ! > > , B; ' &!0 9 . 9 …L3_ReMU Block Set Config. Management Level 3 & 92 L2_ReMU …Function Set L2_ReMU L2_ReMU L2_ReMU Function Set L3_ReMUs : – – – $ Config. Management Level 1 L1_ReM : – – – 9 96 6 9 6 O , g 5 < , g 5 (O D 6 & 9 0 2 9 K " , '< 0 '#0. ' 1" 9 & '- 504 $ "#0 - - < A 9 > ! = / ! -1 5 K 6 2 ' + < / ! & J 1 " '- 69 & ) 9 9 K 6 , 1$ $ ! 96 % 6 < 1" .$ ,$ # #$ 03 19Q 92 < K 9 & > & < J 1" < 6 > / ! 6 '- " K % ) 2 / ! A B 1 , - 4 , / >& ? 2 9 A D 6 E 2+ < :B i 9 2 9 6 6 E + > , ( / ! 1 92 * 9 6 > , =@ X , + 2 2R E 2+; 9 L + ' * GUI Host PC FPGA SystemRec_REQ PC (host) FPGA DSP .out .bit .bit DSP new µBlaze SystemRec_ACK " ( ) data generator wire display FIR Reconf. manager MMI FIR reconfig. controller mapping GUI Host PC mapping reconfig. controller GUI Host PC PartialRec_REQ FIR ) µBlaze FPGA REQ(partial) ql .bit_partial .bit_partial DSP PartialRec_ACK PartialRec_REQ MAP µBlaze FPGA REQ(param) ql DSP REQ(param) ql PartialRec_ACK PartialRec_ACK IP !" # " $% & !" # " ) .! $% " IM $% #$ & " & ! 3 /T- $ * %&! 1 ,0 5 6 > Reconfiguration duration 832 kB 16,6 ms FIR reconfiguration 35 kB 670 µs $% ' 9 Bitstream size FPGA total reconfiguration ( + #' (44G 9 2 c ( d c (?d c ( d + 9 > 9 > 6 " 96 > W " , '< 0 '#0. 9 K , 6 ' B@ '- 504 $ "#0 - > 81 ' 6 2 $ 9 ( 5 > > 6 \ > > ; .$ ,$ # #$ 03 > % < < ( > J 72 92 & 6 6 2 > < '- " 6 ,9 9 ' '- 9 6 & 6 > > 2 1 " > 9 ; 6 1$ $ ! 2 >& " N) ' @ * + > ')2 % + & < c (Bd , - < A9 > 9 1 , - 4 " ' ) _5 1( _( D 2 9 69 @ / ! ' 2 / ! B / ! ] 1( " ' 3 45 C ^ '2, . 2 1 , - 4< 3 C X 2 B: + '> ) > " $ H 2 % " 3 /TG / ! 6 ' %# #") , '< 0 '#0. >& * * + > 1 ,0 5 2 , MANAGING DYNAMIC PARTIAL RECONFIGURATION ON HETEROGENEOUS SDR PLATFORMS Jean-Philippe DELAHAYE, Christophe MOY, Pierre LERAY, Jacques PALICOT IETR/Supelec-SCEE Laboratory, Campus of Rennes, France {jean-philippe.delahaye, christophe.moy, pierre.leray, jacques.palicot}@supelec.fr ABSTRACT This paper deals with partial reconfiguration issues on heterogeneous prototyping platforms (DSP/ FPGA). An analysis of multi-standard physical layer applications in terms of reconfiguration needs permits to extract several use cases of reconfiguration in a multi-standard handset: standard switching, mode switching, bug fixing, etc. The main difference between these schemes of reconfiguration is the level of granularity of the reconfiguration. We argue that a configuration manager needs to handle this multigranularity of reconfiguration to optimize the change of context (either reconfigurable hardware or programmable software processing components) in terms of size of code, time to reconfigure, etc. The analysis also helps to determine the accurate level of flexibility needed through the reconfigurable architecture at different scales. Within this framework partial reconfiguration is an essential feature for optimizing the reconfiguration inside FPGA. We aim indeed at providing reconfiguration adequacy between reconfigurability capabilities of hardware resources and reconfiguration needs of Software Defined Radio applications. Architectural solutions are proposed to implement partial reconfiguration on existing hardware combining DSP and FPGA. 1. INTRODUCTION SDR is expected to be the most appropriate answer to future multi-standards handsets design challenges [1]. We can predict that SDR systems will be heterogeneous in terms of computing resources, in order to deal with a wide variety of radio applications. This implies many research activities in the fields of multi-processing and heterogeneous computing. All the more so as dynamic reconfiguration is involved. But even if solutions exist for DSP [2] where it is more natural to support reconfiguration, it is less common in the field of FPGA. We propose here a combined approach, from the application side to the implementation on the platform, that particularly match to FPGA. The idea is based on the following major point: the reconfiguration request implied by SDR applications will have multiple level of granularity. Taking into account these granularity levels will be of particular importance to manage efficient and speedy reconfiguration at all levels of the system. The functional architecture proposed here takes into account application needs depending on the granularity of the reconfiguration. This functional architecture allows to extract as much as possible the flexibility offered by any reconfigurable heterogeneous platform. This is straightforward for processor-based systems and particularly suits FPGA despite FPGA's limitations in terms of dynamic reconfiguration [3]. Whereas the full flexibility of our functional architecture is not reached in an implementation using current FPGA (limitation of the column-based configuration memory structure [4] of Xilinx FPGA), it allows to obtain the best of their reconfiguration ability. The paper is organized as follow. Next part is concerned with an algorithms analysis of a multi-standard transmitter including baseband processing functions of GSM, UMTS UTRA/TDD, and 802.11g OFDM mode. Configuration management needs are deduced. According to the fact that the flexibility is strongly related to granularity, part 3 proposes a hierarchical model of configuration management which allows to handle the multi-granularity of reconfiguration. Part 4 presents the resulting functional hierarchical architecture deduced from the previous point. A concrete implementation is detailed in part 5, insisting on the FPGA dynamic reconfiguration aspects. 2. APPLICATION ANALYSIS AND CONFIGURATION NEEDS FOR CONFIGURATION MANAGEMENT Basically, The Software Radio aims at providing multistandards communications accesses using a hardware platform based on shared processing resources. The Software Radio hardware requires to be reconfigurable to be able to switch from one processing mode to an other. It has also to be heterogeneous to face the wide variety of processing categories. It implies that the number of configuration contexts to manage grows as the number of standards increases multiplying the number of different processing algorithms. The number of configuration also depends on the type of reconfigurable resources of the hardware platform. First, it is necessary to do a functional baseband analysis to classify each baseband functions in order to reduce overall configuration management complexity. Having in mind the optimization of the configuration management complexity, the classification of the baseband functions in terms of parameters, needs of hardware resources and needs of flexibility aims at reducing the number of configuration contexts. The second step of the application analysis is about the classification of the context switching cases which determines the granularity of a reconfiguration. 2.1. Multi-standards baseband functions analysis The multi-standard functional analysis starts with a first study of the baseband functions of the uplink transmitter of three standards (GSM 900, UMTS UTRA/FDD, WLAN 802.11g mode). These standards have been chosen for their wide variety of baseband signal processing. GSM is a classical mono-carrier TDMA system, UMTS uses CDMA techniques, and finally 802.11g offers a multi-carrier mode (OFDM). Such a multi-standards transmitter involves all of the conceptual challenges, to define a configuration management architecture. We group the multi-standards baseband functions into three functional classes presented below. Gathering functions into classes highly favorises the opportunity to use common operators [5] that can address several processing roles by a simple change of parameters. Fig 1: 3 Standard Baseband Functions Classification and the computing demand resources The Coding Class includes functions like cyclic coding, convolutional and turbo coding. Standards use a wide range of coding schemata. Despite the extensive set of parameters to handle the coding functions, these are generally based on few operators as 1-bit linear shift register and modulo-2 adders. A high flexibility of the processing resources is mandatory to reuse efficiently the common operators. The Data Handling Class puts together the functions which manipulate data packets. Due to the nature of functions (concatenation, segmentation, multiplexing, etc.), the data packet lengths are very different. These functions are exclusively dedicated to data transfers. This class of algorithms is largely control-oriented. It mainly requires a lot of memory resources and flexibility to process different kinds of data. The Modulation Class corresponds to the baseband functions which take place before transposition on frequency carrier. In this class, the functions are computation-oriented and data are often oversampled. High throughput is needed by the filtering functions. The general goal in each class of processing is to group similar processing to maximize the reuse of hardware. Grouping functions into classes helps when dealing with configuration management. Despite grouping the functions in three classes, parameters from a function to an other are quite different. This is the reason why we associated to each function a specific Configuration Management Unit (CMU). Explanations about our configuration management architecture is detailed more in depth in the next section. 2.2. The reconfiguration needs of multi-standards applications. The general goal of our studies is to reduce de configuration management complexity as the configuration overload by minimizing the resources to reconfigure during any context switching. Many design considerations are involved to optimize the reconfiguration such as the architecture, the design methodologies or the parameterization studies, with a common goal: to enhance the reuse of the processing blocks [6]. In this part, we discuss about the multi-standards application switching needs and afterwards we lean on this analysis to propose our configuration management architecture. The different types of application switches that we define are the following: Standard Switching: The standard switch is the most demanding one. The transmitting chain between 2 standards are hugely different. Almost, all the baseband processing have to be reconfigured. Moreover, a standard switch not only implies the physical layer but also the other higher layer of the protocol stack. Mode Switching: A mode switch is considered to be an intra-standard context switching. Some standards define several functional modes. In most of the cases, the mode switch does not include changes of the higher level layer and the mode parameters are often defined by the MAC Layer. For instance in 802.11g is defined with several type of mode DSSS, FHSS, OFDM. So in the case of mode switch, reconfiguration are lighter than in standard switch and it is not worth performing the same huge reconfiguration. Service Switching: A service switch on the physical layer remains a intra-standard switch rather than a service switch to the application level that could implies a standard switch. A standard switch could be a request to increase the data rate. For instance in 802.11g mode OFDM, a request to change the data rate from 6Mbit/s to 54Mbit/s implies to modify at least the mapping function from BPSK to 64QAM. So the service switch could be perform with parameterization and reconfiguration of some of the baseband functions. Then, a partial reconfiguration of the transmitting chain is generally sufficient. Performance enhancement, bug fixing: The ability to reconfigure small parts of a processing chain is necessary to allow some performances enhancement or bug fixing when the system is already in use . The possibility to change any processing patterns in chain is closely related to design methodologies which have been used during the definition of the processing chain architecture and the possibilities of the hardware resources to change such small parts of a given function. This type of reconfiguration has to be performed without any discontinuity of service for the user. Actually, the different kinds of context switching, detailed above should be supported in a SDR system. The given configuration management architecture has to be able to manage the different granularity levels of configuration to optimize the reconfiguration. We propose, in the next section, a hierarchical configuration management architecture that will efficiently enable the handling of the multi-granularity of configuration. 3. HIERARCHICAL CONFIGURATION MANAGEMENT MODELLING 3.1. Config-Data Path Model The communication applications are dataflow oriented, then our approach, detailed in [7], is based on a data path model. The functions of the SDR transmitting chain are mapped into several Processing Block Units (PBU). Following this approach, each PBU is optimized using specific reconfigurable hardware resources. In addition, a configuration path, also split into several Configuration Manager Units (CMU), controls the reconfigurable processing path. Each CMU, dedicated to a type of PBUs, manages the configuration of a type of baseband function in the chain. The split configuration path offers the possibility to partially reconfigure the transmitting chain by an independent reconfiguration of each PBU. The distributed configuration management approach also decreases its design complexity. 3.2. Hierarchical Configuration Management Architecture The section above presented the benefits of the configuration datapath model. It is a base to manage the heterogeneity of baseband processing functions and enable the partial reconfiguration of the SDR transmitting chain. The hierarchical configuration management model illustrated in the Figure 2 is based on the config-data path approach. This model is necessary to manage the multigranularity of configuration required by the different context switching detailed in the section 2.2. It is composed of three levels of hierarchy that are detailed below. Figure 2: Hierarchical Configuration Management Model Hierarchical level 1 The first level is split into 3 entities following the functional classification described in Fig 1. This first high level classification allows a control of category-specific functions to manage parameters at the standard level. The Configuration Manager L1 (L1_CM) works at the standard level as host toward the underlying levels of management. This entity is in charge of choosing the functional units which will constitute the entire configuration of the baseband processing chain. At this level, generic functions are handled as generic components. Any hardware implementation is not yet considered. Hierarchical level 2 The generic functions selected at level 1 are parameterized at level 2 in accordance with standard specifications. The set of attributes of each function is handled by the Configuration Manager Unit L2 (L2_CMU) to create each functional context of the entire processing chain. For example, the generic function "bit to symbol mapping" of the modulation class is set at level 2 to fit the selected standard. In the case of 802.11g standard this setting could be BPSK, QPSK, 16-QAM, 64-QAM constellation. In the case of a mode switching the needed reconfiguration directly involved the concerning L2_CMUs. Hierarchical level 3 The processing data path architecture of this third level will depend on the reconfigurable computing resources of the hardware architecture. The complete processing path could be formed by different types of reconfigurable resources in accordance to the studies detailed in the first section. It could correspond to configurable accelerators, array of DSP blocks or a fine grain reconfigurable data path. The main task of the (L3_CMUs) in the configuration path is to find the available processing resources and configure them to enable the execution of the functional context created at the above level. 4. HIERACHICAL FONCTIONAL ARCHITECTURE This section presents a functional hardware architecture made up of three main reconfigurable clusters. After an overview of this heterogeneous architecture, each cluster is described in further details. This architecture fits the hierarchical configuration management considerations, proposed in the previous section. It allows first to answer the needs of handling the multi-granularity of the context switching. Second, the hardware architecture is separated into three main hardware clusters. The cluster resources are specialized according to the classification of the baseband functions into three classes presented in section 2.1 and required performances. These three clusters have the same general architecture: a cluster is designed around a dedicated central processor with separated data bus and program bus (as a typical Harvard architecture). This allows to distinguish the configuration data path from the processing data path. The processor unit of the CM_L1 controls the three clusters to work together. The CM_L1 also manages the whole application parameters. The configurations of each cluster are controlled by the L2_CM processor units. Each L2_CM is interfaced to a processor designated as master. In addition to this master processing unit, some other reconfigurable units are used to speed up the data processing of the cluster. These reconfigurable accelerators are various as the baseband functions have different needs. So a L3_CM is associated with each different reconfigurable accelerators. Coding Cluster: Software implementations, for the functions of the coding class, are possible. But hardware will be more efficient and will consume fewer resources. Since data width is only one bit for this class of functions, software implementations are generally under optimized because of using 16/32 bits processors. Furthermore hardware implementation is necessary to reach the expected high throughput performances of 54Mbps in 802.11g. On the other hand, facing the wide variety of coding schemes, software flexibility (DSP) is mandatory. Consequently, the coding cluster is composed of DSP aided by reconfigurable co-accelerators accelerated through efficient dedicated coprocessors corresponds to a suitable architecture for this class of processing functions. Data Handling Cluster: The high flexibility offered by the GPP processors is interesting to run the wide variety of handling functions of every standards. The data handling functions almost perform only memory transfers, so a DMA interface is useful between the processor and the memory. The memory is a sizable arrays of SRAM blocks, it allows to resize the array by switching-off the unused memory blocks, depending on the handling function. Power saving architecture design, for this class of functions, is a key factor, as memory consumes a lot of power. Modulation Cluster: Most functions handle data, up to 32 bits, which takes a lot of resources. Consequently, some of these functions, like pulse shaping filtering will take advantage of an implementation in dedicated configurable hardware accelerators. One other point is the intensive processing requirements of the Modulation class functions which often necessitates a HW implementation. The main goal of this model is to help defining features needed by the hardware platform for SDR applications. The Fig 3 depicts the model as an architecture with lots of duplicated resources (multiple microprocessors and buses). This just illustrates, for a good understanding, our hierarchical view of the hardware platform. Fig 3: Hierarchical functional architecture For instance, in the CM part of the model, the multiple microprocessors (called µP) are instantiated as multiple tasks running on a processor of the hardware platform. The next section illustrates in a more detailed manner the effective implementation of this functional architecture in a real platform. 5. HETEROGENEOUS HARDWARE SDR PLATFORM The use of reprogrammable (DSP), reconfigurable (FPGA) devices [8] and more generally reconfigurable computing architectures is commonly admitted for Software Radio. We present the heterogeneous (GPP/DSP/FPGA) platform and especially FPGA reconfiguration requirements to enable the implementation of our model. We focus on the FPGA, because it is foreseen as a good intermediate architectural possibility between DSP and ASIC. FPGA combines the flexibility of DSP and the throughput efficiency [9] of ASIC. But this really makes sense only if we can efficiently manage reconfiguration of FPGA. Mandatory requirements are partial and dynamic capabilities of reconfiguration for FPGA. Currently, a few FPGA devices enable dynamic and partial reconfiguration [10]. Moreover, the system architecture [11], [12] is a key factor to make embedded these kind of reconfigurations. 5.1 From the Architecture Model to the platform. The implementations on the platform are currently under development. We present in this part the platform and the drawn up implementations. Our prototyping platform is composed of a GPP, a DSP, a FPGA and different types of memories. The functional architecture of the Fig 3 is mapped into this platform as it is illustrated in the Fig. 4. It consists in gathering on each category of HW processing device (GPP, DSP, FPGA) on the one hand the PBU (in white on Fig. 4) and on the other hand the CMU depending on their hierarchical level. configuration of the functions that run on the cluster resources. The FPGA of our platform is a Virtex -II device from Xilinx. The following components of the functional model: the hardware accelerators, the small co-accelerators and the sizable array of blockRAMs are mapped into the FPGAs. The partial reconfigurability is of course a mandatory features to allow reconfiguration of a single component. The L3_CMUs responsible for the configuration of the coaccelerators are implemented as task into the µBlaze soft processor. It allows to perform fine grain reconfiguration of the FPGA without involving any external resources. Next section details furthermore the internal architecture of the FPGA to enable the partial dynamic reconfiguration. Components Library: The Hardware and software designs of the processing functions are stored in the external storage memory of the platform where the configuration management takes them. 5.2 FPGA architecture on the embedded platform. At the initial stage the CM of the Host (GPP) downloads the DSP boot program that includes in its data memory the initial full configuration of the FPGA. It consist in the FPGA design architecture. It includes an internal configuration controller (µBlaze soft processor), the internal reconfiguration interface (ICAP), the initial instantiations of PBUs and the communication interfaces with the DSP. Fig. 4: Platform architecture The GPP and the external storage memories are resources used from a standard PC station. The GPP is the host of the rest of the platform. The L1_CM is a task running on the GPP which controls the configuration the other platform resources (DSP, PFGA). The control-oriented functions as some Data Handling functions take advantages of the flexibility of GPP. The DSP is a C64 from TI. The DSP parts from the both clusters Modulation and Coding Clusters are tasks running of the C64. The DSP works as the master of the (DSP/FPGA) subpart of the platform, Then the L2_CMUs of the three cluster are also mapped on the DSP. The position of master allows the DSP to manage the overall Fig. 5: Details of the FPGA architecture The Fig. 5 illustrates this platform architecture with details of the internal FPGA design that enable two types of dynamic partial reconfiguration depending on their granularity level. One stays internal in case of limited-scale reconfiguration (for co-accelerators configuration) or design parameterization (auto-reconfiguration). This implies to interconnect the µBlaze to the ICAP internal configuration interface. This kind of reconfiguration of the FPGA by an processor (µBlaze) embedded in the FPGA is called self- reconfiguration or auto-reconfiguration [13]. In this case, small partial bitstreams are stored inside the FPGA, and the use of auto-configuration let free the other HW resources of the platform. At a larger scale reconfiguration for the HW Accelerator is external. This implies to interface to interconnect the DSP to the external SelectMap or internal ICAP reconfiguration interfaces. The Bitstream corresponding to the design of HW Acc. are stored in an external SRAM memory. 6. CONCLUSION Through the proposed models, this paper shows the tradeoffs to provide a full reconfiguration flexibility with a reasonable hardware complexity from configuration management to the hardware architecture. Actually, the hierarchical functional architecture offers the maximum of the reconfiguration flexibility. It allows to take as much flexibility as possible from any kind of heterogeneous platforms. As this functional architecture is not specific to any platform or device, it will improve itself and will easily take advantages of the future evolutions of reconfigurable computing technologies in terms of flexibility. Following this idea we already achieve to take into account existing limitations concerning the partial reconfiguration of FPGA. 7. REFERENCES [1] J. Mitola, "The software Radio architecture," IEEE Comms Mag, vol. 33, no. 5, pp. 26--38, May 1995. [2] C. Moy, A. Kountouris, A. Bisiaux, "HW and SW Architectures for Over-The-Air Dynamic Reconfiguration by Software Download," SDR Workshop of the IEEE Radio and Wireless Conference, Boston, USA, Aug. 2003 [3] J.P. Delahaye, G. Gogniat, C. Roland, P. Bomel, "Software Radio and Dynamic Reconfiguration on a DSP/FPGA Platform," 3rd Karlsruhe Workshop on Software Radios, proc. pp 143-151, Karlsruhe Germany, March 17-18 2004. [4] "Virtex Series Configuration Architecture User Guide," Xilinx, Inc., 2100 Logic Drive, San Jose, CA 95124,XAPP151 (v1.6) March 24, 2003 http://www.xilinx.com/bvdocs/appnotes/xapp151.pdf. [5] J. Palicot, C. Roland, "FFT a basic Function for a Reconfigurable receiver," ICT Conf., Tahiti, 2003. [6] DeHon, J. Adams, M. DeLorimier, N. Kapre, Y. Matsuda, H. Naeimi, M. Vanier, and M. Wrighton., "Design Patterns for Reconfigurable Computing," fccm, pp. 13-23, 12th Annual IEEE Symposium on Field-Programmable Custom Computing Machines (FCCM'04), 2004. [7] J.-P. Delahaye, J. Palicot, P. Leray, "A Hierarchical Modeling Approach in Software Defined Radio System Design," SIPS 2005, Athens-Greece, Nov. 2005. [8] M. Cummings, S. Haruyama, "FPGA in the Software Radio," IEEE Comms. Mag., vol. 37, no. 2, pp. 108-112, Feb. 1999. [9] T. Claasen, "High Speed: Not the Only Way to Exploit the Intrinsic Computational Power of Silicon," Digest of Tech. Papers ISSCC 99, IEEE Press, pp. 22-25, 1999. [10] S. Donthi and R.L. Haggard, "A survey of dynamically reconfigurable FPGA devices," Proc. IEEE Symposium on System Theory, pp. 422-426, 2003. [11] M. Ullmann, M. Huebner, B. Grimm, J. Becker; "An FPGA run-time system for dynamical on-demand reconfiguration," 18th International Parallel and Distributed Processing Symposium, 2004. Proceedings.pp.135, 26-30 April 2004. [12] J.C. Ferreira, M. M. Silva, "Run-time reconfiguration support for FPGAs with embedded CPUs: The hardware layer," Proc. of the 19th IEEE International Parallel and Distributed Processing Symposium (IPDPS’05). [13] B. Blodget, P. James-Roxby, E. Keller, S. McMillan and P. Sundararajan. A Self-reconfiguration Platform. In proceeding of 13th International Conference on Field- Programmable Logic and Applications, FPL’2003, pp. 565-574. Sept. 2003, Lisbon, Portugal. Partial Reconfiguration of FPGAs for Dynamical Reconfiguration of a Software Radio Platform Jean-Philippe DELAHAYE, Jacques PALICOT, Christophe MOY, Pierre LERAY IETR/Supélec - Campus de Rennes Avenue de la Boulaie, CS 47601, 35576 Cesson-Sévigné CEDEX, France phone: + (33) 2 99 84 45 00, fax: +(33) 2 99 84 45 99 Email:<jean-philippe.delahaye,jacques.palicot,christophe.moy,pierre.leray>@rennes.supelec.fr Abstract— In the context of a Cognitive Radio Terminal we propose to use Partial Reconfiguration of FPGAs in order to obtain a enhanced wide band Software Radio Platform. That means the reconfiguration should be performed dynamically (during the run time). Only Partial Reconfiguration of FPGAs could currently meet this requirement both in terms of power computing efficiency and reconfiguration speed. In this paper we show how it is possible to perform this type of behavior within an heterogeneous platform. We consider in detail three different situations illustrated with the three following examples: the dynamical reconfiguration of a convolutional coder, a constellation mapper, and a FIR filter. I. I NTRODUCTION Within the framework of the European IST End to End Reconfigurability phase two (E 2 R II) project, we propose to perform dynamic reconfiguration using FPGAs. This behavior is included in a complete baseband platform developed by other partners of the project. New wireless transceiver [3] based on the Cognitive Radio (CR) concepts [1] will be feasible when the wideband true Software Radio Technology [2] will be available. This implies a lot of signal processing performed in real time. Another important requirement is that some function should run during the reconfiguration of others functions. Dynamically reconfigurable FPGAs can provide this feature because they offer a new design space with a variety of benefits, among others: flexibility and reusability at run time. The dynamic reconfiguration is closely related to partial reconfigurability of FPGA [12]. Indeed, the partial reconfigurability allows to selectively change segments of a FPGA functionality without suspending operations of the remaining parts. There are several benefits for partial reconfiguration (PR): it reduces the configuration time and saves storage memory as the partial reconfiguration files (bitstreams) are smaller than full ones. It may limit also the overhead and the bandwidth spending for code downloading in case of OTAR. Reconfigurable computing [4], [5] has been proposed in a large range of signal processing applications in order to improve high performance, flexibility and adaptability. The development of wireless communication systems has revealed the need to dynamically adapt systems architectures at the hardware level, as in Software Radio (SWR) system [6]. [7] proposed a first vision of SWR transceiver using dynamical reconfiguration. PR on a Virtex-E FPGA was described in this work. The PR has been extended at the system level for the Fig. 1. Different types of dynamic reconfiguration. heterogeneous SWR platform in [10]. In this paper we are going further, describing several implementations of partially reconfigurable processing modules using the partial reconfiguration capability of the Virtex-4. The system architecture that should be adopted to achieve the PR is also considered here. Furthermore, in this paper we address new PR capabilities of the Virtex-4 FGPA, comparing with Virtex-II. They provide more flexibility and enhanced reconfiguration performances. The dynamic reconfiguration is used to share a group of configurable elements between several processing contexts. This is a resource multiplexing in time. We can distinguish two types of dynamic reconfiguration which are illustrated in Fig. 1. Figure 1.1 shows the case of the dynamic reconfiguration with data dependency between the functions T 2 and T3 that share the same reconfigurable resource area. In this case, it is necessary to ensure the data temporary storage. Figure 1.2 shows the use of dynamic reconfiguration without data dependency between two functions. For example, the second case could occur when the transmission environment changes [11], (signal-to-noise ratio, available power, etc.), and when a more efficient function or a less power consuming one is required. This situation occurs classically in Cognitive Radio systems. The first case may happen when processing blocks of a large function are multiplexed. This is useful to reduce the area of the function when needed. We have presented in [13] an application that takes advantage of the first type dynamic reconfiguration. We focus in this paper on this second type of dynamic reconfiguration. The rest of the paper is organized as follow. The state-of- the-art of reconfiguration of SWR platforms is given in section II. The need of partial reconfiguration is also highlighted. Section III deals with the management of Partial Reconfiguration. Section IV describes implementation alternatives through three examples of PR modules. In this section the general architecture of a PR module is also provided. The conclusions are stated in section V. II. STATE OF THE ART OF THE PARTIAL RECONFIGURATION A. System Architecture for the achievement of Reconfiguration Several alternatives of system architectures are already possible to ensure the dynamic partial reconfigure of FPGAs. Depending on the FPGA reconfiguration capabilities, reconfiguration can be done two different ways: the external or the internal reconfiguration. It depends on both the reconfiguration interface and reconfiguration controller available on the platform. An overview of the corresponding architectures are proposed in Fig.2. In Fig.2.1 the configuration controller is an external processor connected to the external configuration interface of the FPGA: the SelectMap port. In the second architecture, both the controller and the interface are internal. The configuration controller is an embedded processor, and the configuration interface is the Internal Configuration Access Port (ICAP). In both cases, the logic resources are separated into two type of areas, the fixed logic and the dynamically reconfigurable area. Each of these two parts may contain several modules. On the one hand the fixed area contains modules whose behavior never change. On the other hand the reconfigurable modules should be designed in a column (rectangular area) as the reconfiguration on Virtex-II is done by reloading entire columns of CLB. The interconnection of a reconfigurable module with an other reconfigurable or fixed module has to be realized through a special communication interface called Bus Macro. These Bus Macro provided by Xilinx are used to ensure the right place and routing of signal crossing over PR area as explained in [7]. The FPGA are based on SRAM Configuration Memory (CM) (or configuration plane). This is the plane usally used to configure the FPGA at boot time. The CM of the Virtex-E/II FPGA series allows to create partial bitstreams containing the whole column height of a CLB configuration data. One of the most important enhancement in terms of PR capability on the Virtex-IV serie is that the CM of Virtex-4 is a Block Frame Based configuration memory. It allows to design any rectangular reconfigurable area, even smaller than a column. The size of the bitstream may be greatly reduced as just the block frames belonging to the PR area are included in the bitstream, rather than at minimum all the column frames like on Virtex-II. It increases the flexibility of design, and performances of PR as well. The bandwidth of the V4 ICAP is also 8 times greater than the ICAP bandwidth of Virtex-II so the reconfiguration time is speeded-up. As the V4 FPGA allows to design rectangular PR area, Xilinx provides a new version of its Bus Macro for both horizontal and vertical signal Fig. 2. Fig. 3. Macro Two architectures to perform PR on FPGA. Rectangular PR area on Virtex-4 with horizontal and Vertical Bus inteconnections. These new versions of Bus Macro are based on pre-routed and pre-placed LUTs as shown in Fig.3. B. The need of Partial Reconfiguration In [8], we identified that a multistandard device should perform a lot of functions. These functions could be classified in three classes as detailed in Fig. 4. Each class of functions is either memory-oriented, computation-intensive or needs implementation flexibility to answer the wide range of coding patterns. Each cluster architecture is oriented depending on the processing requirements of the different classes of functions. We have proposed in [10] a system architecture based on these clusters. Moreover, each cluster is characterized by the type Fig. 4. Classification demands of Computing Resources. of operation (the data size, the memory, the frequency, the computing resources required). In fact, it is feasible to share part of the processing resources between several standards at the price of the use of reconfiguration. But as standards present functions communalities just some sub-part of the standards should be changed during a standard switching. So Partial reconfiguration is an essential feature of SWR systems. But this feature has only been reserved to processors until now: changing a sub-part of the executable code of a DSP, for instance as in [14]. We show hereafter how this concept of PR may be generalized to the reconfigurable HW world. III. MANAGEMENT OF THE PARTIAL RECONFIGURATION The presence of a Xilinx V-4 FPGA on the E 2 R II demonstrator offers the opportunity to implement partial reconfiguration of FPGA, which is of great interest for future SWR systems design. The PR of the Virtx-4 is involved on the platform as follows: each processing function/module potentially contains sub-parts which will be reconfigurable to enhance the flexibility of the function. There dynamically reconfigured sub-parts are mapped into reconfigurable areas in form of PRMs (Partial Reconfigurable Module). The PR implementation necessitates to connect to ICAP in the chosen context of auto-reconfiguration as shown in Fig.2.2. Autoreconfiguration means that the reconfigured areas of the FPGA (PR Regions) are managed by the FPGA itself, through an embedded processor, such as the soft-processor MicroBlaze provided by Xilinx. This latter could be much simpler, as a state machine, but the MicroBlaze will also be responsible of other managing tasks. For instance, the MicroBlaze could also be the control interface with the NoC network, as in solution 1 of Fig. 5 exposed hereafter. In the context of E 2 R II platform, the PR processing modules inside the FPGA are linked to the other processing modules of the board through the NoC network which is extended from the CEA Faust SoC [9] to the FPGA. Several solutions can be considered to connect the PR processing modules to the NoC network. Two of them are suggested below to illustrate the different approaches of the insertion of PR processing modules in the processing chain of Fig. 5. Solution 1 for the architecture of Virtex 4 and its interconnexion with NoC from CEA [9] the SWR platform. Note that in both cases, the reconfiguration link between the MicroBlaze and the processing modules is hidden to the FPGA logic resources plane since it goes through ICAP into the configuration plane. A. First Solution In solution 1 of Fig.5, the NoC [9] is connected to the PR architecture through the MicroBlaze while the PR subparts of the system remain quite independent. The MicroBlaze manages on the one hand the data transfers coming from (respectively going to) the NoC towards (respectively from) the processing modules with PR capabilities. On the other hand, it also manages the configuration of the PR Region inside each PR processing module. The PR Regions are highlighted by additional red rectangles in the diagrams. The PR region will support the reconfigurable tasks which will allow to change the functional behaviour of the processing module. Three configurations are drawn on this figure for PR Region A: PRM A0, PRM A1 and PRM A2. PRM Ai are the bitstreams corresponding to the PR Region A. This results in three potential processing capabilities for the corresponding processing module. They can be changed at run-time, in other words while the rest of the FPGA still runs. Depending on the reconfiguration duration, data should be stopped at the entrance of a PR processing module under reconfiguration. The reconfiguration tasks could also be chosen in an idle time between two data buffers computation. B. Second Solution Main drawback of solution 1 is that the MicroBlaze risks to be overloaded by data transfers activity. Consequently, a second solution is proposed in Fig. 6. Solution 2 includes the processing modules supporting PR in the NoC architecture itself, in the sense that each PR processing module is connected independently to the NoC for data transfers. A MicroBlaze is still necessary as one of the nodes of the NoC. Its role is here only to manage the PR Regions bitstreams. No specific connection is necessary between the MicroBlaze and the dynamically reconfigurable processing modules since Fig. 6. Zoom on Virtex 4 architecture for solution 2. PR is operated through the ICAP and the configuration plane of the FPGA. This physically separates data transfers from control transfers on two different communication means. This is a great advantage in order to avoid contention between data and reconfiguration transfers. IV. E XAMPLES OF P ROCESSING MODULES A. General architecture of a PR processing module Fig. 7. Partially Reconfigurable Constellation mapper to change the coefficients of the FIR. Consequences are more important than expected at a first glance.This approach prevents from designing and consuming communication resources to access several sets of coefficients. This includes address and data buses for RAM access. ROM is directly reconfigured through the configuration plane. Even concerning a common functionality, many considerations can be taken into account, and consequently derive several implementations. Depending on the nature of the processing module, the PR region (red rectangle) may support functionally different subparts of a module, as for instance: • for a FIR: coefficient storage zone, • for a FIR (another implementation): controller that manages the FIR length, • for a convolutional coder: routing paths, • for a constellation mapper: fully reconfigured. This set of functions has been chosen to explore as many situations as possible. We propose to detail each of these cases hereafter. The general architecture of a PR processing module includes a PR Region, which supports the PRM part of the module, but also a fixed control part and a fixed processing part. B. Constellation Mapper In a PR Region, it is firstly possible to consider the implementation of different tasks occupying the same size in terms of configurable elements. This is a priori the situation we can expect when thinking at the PR of FPGA. But this is only a specific situation from which can be derived many others. This is illustrated by Fig. 7 in the context of a constellation mapper. Mapping from BPSK, to 16 QAM, 64 QAM, etc. is directly provided in the bitstreams of the PRM. This is a module-based PR Region that necessitates to be connected to other design areas through Bus Macro. C. FIR Implementations Fig. 8 refers to the case of a FIR whose coefficients must be changeable. This is here the set of coefficients that is located in the PR Region. FIR coefficients are stored in FPGA memory blocks configured as ROM. The ROM blocks are reconfigured Fig. 8. FIR implementation 1 for pulse-shaping flexibility. Fig.9 for instance takes again the example of a FIR, but with a different goal than in previous figure: change the length of the FIR. This includes changing the coefficients (as previous case), but also a sub-part of the control finite state machine that manages the number of calls of the 4-taps FIR, and a set of registers to memorize some temporary data. These three items are included then in the PR Region. D. Convolutional Coder Implementation Fig. 10 shows how this is implemented for the example of a convolutional coder. Here, PR is used to configure a TABLE I B ITSTREAMS S IZE AND R ECONFIGURATION T IME ON A V IRTEX -II 2000 IP Bitstream Size Reconfiguration Time Fully Reconf. FIR 74.7 kB 1.5 ms PR FIR 25.7 kB 510 µs 2 kB 40 µs PR Conv. coder of the FPGA which is about 16 ms for the xc2v2000 device. This stresses that many design choices may impact the size of the bitstream and consequently the PR efficiency, in function of the design effort. But this issue is out of the scope of this paper. Fig. 9. FIR implementation 2 for filter length flexibility. reconfigurable Switch Box (RCSwb) that allows changing the polynomial generator of the coder. The aim is to obtain very small size bitstreams for a minimum memory occupancy and a minimum reconfiguration time. V. C ONCLUSION We propose in this paper to take benefit of the PR in order to enhance current SWR and future CR system capabilities. FPGA PR permits indeed to bring run time reconfiguration to the reconfigurable HW world. We propose a general architecture for the design of PR modules, and illustrate it for three examples. The next step of this work is to validate this concept within the platform of the E 2 R II project. VI. ACKNOWLEDGMENT This work was performed in project E 2 R II which has received research funding from the Community’s Sixth Framework programme. This paper reflects only the authors’ views and the Community is not liable for any use that may be made of the information contained therein. The contributions of colleagues from E 2 R II consortium are hereby acknowledged R EFERENCES Fig. 10. Convolutional coder with a dynamically reconfigurable switch box. As previously stated in the other examples, the sending of new polynom values does not require the use of a dedicated communication means for configuration data transfer. This is automatically operated through the configuration memory plane which permits to define the FPGA run-time design. E. Partial Reconfigurations Results The table I gives some results and orders of magnitude in terms of partial bitstreams size and reconfiguration duration for the listed PR IP on a Virtex-II 2000. The results given in table I are highly dependent on the chosen design methodology. As shown in table I, adopting a design methodology to partially reconfigure a sub-part of the FIR (the PR FIR) allows to significantly save reconfiguration time (510 µs) comparing with the IP that is fully reconfigured in 1.5 ms. Note that even the latter case is 10 times better that a full configuration [1] J. Mitola, Cognitive Radio, Ph.D. dissertation, Royal Inst. of Tech., Sweden, May 2000. [2] J.Mitola, ”The software Radio Architecture”, IEEE Communications Magazine, May 95, pp. 26-38. [3] Christian Roland, Jacques Palicot, ”A New Concept of Wireless Reconfigurable Receiver”, IEEE communications Magazine, july 2003,pp. [4] K. Compton and S. Hauck, Reconfigurable computing:a survey of systems and software. ACM Comput. Surv.,vol. 34, no. 2, pp. 171210, 2002. [5] R. Hartenstein, A decade of reconfigurable computing: A visionary retrospective, date, vol. 00, p. 0642, 2001. [6] M.Cummings and S.Haruyama, Fpga in the software radio, IEEE Communications Magazine, vol. 37, no. 2, pp. 108112, Feb 1999. [7] J.P. Delahaye, G. Gogniat, C. Roland, P. Bomel,”Software Radio and Dynamic Reconfiguration on a DSP/FPGA Platform”, Frequenz n58(2004)56, Germany. [8] J.P. Delahaye, J. Palicot, P. Leray, ”A Hierarchical Modeling Approach in Software Defined Radio System Design”, IEEE Workshop on Signal Processing Systems, SIPS 2005, Athens (Greece), November 2005. [9] R. Rabineau, D. Lattard, Y. Durand, M. Lobeira and JP Rossi, ”Flexible Test-Bed for B3G Systems”, IST mobile summit 2006. [10] J.P. Delahaye, C. Moy, P. Leray, J. Palicot, ”Managing Dynamic Partial Reconfiguration on Heterogeneous SDR Platforms”, Sdr Forum 2005, November, Los Angeles, USA. [11] J. Liang, R. Tessier, and D. Goeckel, A dynamically reconfigurable power-efficient turbo decoder. in FCCM. IEEE Computer Society, 2004, pp. 91100. [12] Xilinx, Early access partial reconfiguration user guide,ug208, 2006. [13] H. Wang, J.P. Delahaye, P. Leray, J. Palicot, ”Managing dynamic reconfiguration on MIMO Decoder” in 14th Reconfigurable Architectures Workshop, RAW 2007, Long Beach Ca, USA. [14] Apostolos Kountouris, Christophe Moy, ”Reconfiguration in Software Radio Systems”, Second Karlsruhe Workshop on Software Radio, March 2002. '- $ "#0 - 504 - " , '< 0 '#0. .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " '- 504 $ "#0 - * - & & .$ ,$ # #$ 03 c =d% % ) G c ;d , / 92 0 9 & H< G '- '- " G H< G ) ! A < 1 " % %. 5$ #0, 5$ #0, 1$ $ ! 6 9 % ) ( * < H 6 9 < H Decide Sense Adapt % ) % ) ) .! 4M :"% " . + O K *L3 &5@ 5 & 2 $ %+# 3 ;5 % %. 6 2 2 &' < , 9 9 9 ( & 6 6 < '9 9 6 6 96 G > ( & 9 H < K 6 D , / ( 6 / c ( d$ 9 % ) 9 6 6 − > 2 % >( 6 < − )< 9 < − " , '< 0 '#0. ? '- $ "#0 - 504 - .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " smart sub-system analysis stimuli learning user SDR communication sub-system network environment Application layer sensing means Multiple physical layers Multiple Multiplephysical physicallayers layers electro-magnetic environment '9 M 7! $ adapting means … hardware ) .! orders decision & % %. &! % & . A 96 , # 3 > 6 6 6 2 9 9 9 >6 6 2 > > / 6 c =d > ' $ 6 0$ % "& ! 7 7 7 > 2 > 9 9 &;% M P& # # "& ! & '9 ? "$ % %. > 9 > 1 , - 4 6 > , % >6 6 69 6 9 > -1' ) ' 2 / , 5 %1 % " < 9 & N) & , U !01 -1 5 6 9 ( & 1 >& * 2 + > 6 / ! ) , '< 0 '#0. " % % & '- 504 $ "#0 - - 9 > .$ ,$ # #$ 03 ( 9 9 Level 1 L2_CRMU reconfiguration orders L2_ReMU (M 0 %& " ! . ' reconfiguration orders (parameters) "! ". 0 . ' = ( > 2 > , g' 5% ) , > < 9 > ( > > +.! & / > , g 5% ) 6 > 9 6 ( 9 metrics, notifications Level i Li_CRM(U) Level i-1 * M )% " towards next PBU > Level i+1 ) .! c ( d L3_ReMU metrics or reconfiguration sensing orders information processing block unit (PBU) from previous PBU '- " network reconfiguration orders L1_ReM L3_CRMU '- 2 L1_CRM Level 2 1 " 9 reconfiguration orders Level 3 ) .! 2 < network metrics high level metrics or sensing information 1$ $ ! reconfiguration orders reconfiguration orders Li_ReM(U) metrics, notifications #" & . " % #% #$ ". ' , '< 0 '#0. reconfiguration orders %& " "! . ? '- $ "#0 - 3 504 - 2 69 .$ ,$ # #$ 03 1$ $ ! 1 " 9 > > 9 6 2 9 >< 9 P $ 9 6 9 9 9A / 2 % =" 9 , )< 5, +K 9 2 6 < 2 5, Q 6 < " 6 < 6 9 $ 3 $ c = d T A 6 > & & , / 5, ; + : 2 > c ( =d < 6 9 6 & 5, < T + , U !01 1< 6 < T ,M # &$ 2 /% %& " " "! , '< 0 '#0. 6 < >6 > 9 # ) .! / 6 & > 3#, 5 + & 5 ' ! 6 5, -1' ? '- " -1 5 J # 6 9 + '- . ". ' ' 2 '- $ "#0 - ' 504 + - & K 9 ( 96 2 >6 1 9 > 1 " 6 $ 9 9 > 6 K & < >6 , I >6 Q 9 9 ( 9 ( 2 / @ : L1_CRM : L1_ReM Order: (None, None) Info: Null : L3_ReMU ISI_capture T > 6 96 9 ) Order: None Info: Wait for entry : L2_ReMU_func_fusion : L2_CRMU_func_fusion Order: (None, None) Info: Null Order: None Info: Wait for entry : L3_ReMU Equalizer : L3_CRMU ISI_capture Order: None Info: transparent Order: None Info: Wait for entry Order: None Info: Wait for entry Operator Equalizer Metric value: none State: activated Metric value (%) : 75 ) .! >6 %,=g 5 ) >6 -M "# & #" $ ;& K = %,=g' 5 ) , 2 6 9 9 > < > 2 9 0 A >( 2 + 9 9 2 < 69 9 $" 9 $ "< , g' 5 9 9 + 9 : L3_CRMU Equalizer Order: None Info: Null Operator ISI_capture 9 9 6 6 , ,=g' 5 A 1 6 ( 6 >6 92 9 > 9 %$ ") , 9 $ " %$ "$ 96 < '- " & K +K '- 9 2 & 1$ $ ! < ( < & .$ ,$ # #$ 03 D ( K < 9 6 9 9 < 2 / 1 K " 9 > , '< 0 '#0. @ % ?= '- 504 $ "#0 - >M 6 9 K 3 .$ ,$ # #$ 03 6 < 9 '- 9 $" 9 ( '- " > 6 9 + & 6 6 < 2 K & & & >6 9 ( & , < K 6 2 2 $ 9 < 6 9A > 6 > 1 " ) L > 1$ $ ! 6 < 9 ;= , 9 > 9 < >&6 >6 ( ( > 6 3 9 > 9 ' K $ $ 9 9 9 < A 6 < 6 +K 6 > 2 69 & ' > & > < 6 6 & 3 D A A 6 A > > 0 5 9 > 9 0 A > -1' < < M > 9 > < 92 >6 9 6 , 6 J 9 2 >& & 6 9 + + A ( > *9 > 6 , >& A 9 1", < A 9 6 ) > > 6 ) & A9 ,9 9 * 5$ #0, B A 2 6 T , %) VV [< c =d 2 9 K A $ # >& ' ' ' % ?; , U !01 6 & c = d 9 9 2 %$ 6 ' ' :)< ) ' T 2 " > 1 < C ' 6 , '< 0 '#0. 2 0 Z > Z X > + & " 3 C X C > CrownCom'06 – First International Conference on Cognitive Radio Oriented Wireless Networks and Communications< From a Configuration Management to a Cognitive Radio Management of SDR Systems Loïg Godard, Christophe Moy, Jacques Palicot Abstract—This paper proposes a functional management architecture for Cognitive Radio systems. It relies on a previously defined configuration management architecture for multi-standard SDR systems, and complement it to support cognitive radio features. This paper explains the requirements of Cognitive Radio systems in terms of reconfiguration, smartness and sensing capabilities. A configuration management architecture capable of dealing with the hardware heterogeneity and a wide range of reconfiguration scenarios expected with SDR systems is presented. The management is distributed over the system and a hierarchical dependency is set on 3 layers, each having a different level of knowledge of the system and the associated hardware constraints of the elements it supervises. Then a cognitive management functional architecture is derived from the previous one, copying the 3 layers of hierarchy. The roles of the elements of each layer are discussed, as well as their respective interactions and their relationships with the elements of the configuration management architecture. induced by SDR usage scenarios on the other hand. As detailed in [5] a CR equipment should take decisions based on information given by sensors on the one hand and on rules and self learning capabilities on the other hand. These considerations have direct consequences on the hierarchical configuration management of [4]. In fact each management level should be driven by an associated cognitive module. That is why we propose, in this paper, a distributed hierarchical cognitive reconfiguration management for SDR systems. The remainder of the paper is organized as follows. In section II, we describe properties of Cognitive Radio systems. Then we deal in section III with the hierarchical configuration management architecture. In section IV, we describe our proposal for a hierarchical cognitive management mapped on the previous configuration management architecture. It is particularly stressed how these two hierarchical approaches are inter-related. Finally, in section V we conclude this work. Index Terms—cognitive radio, cognitive management, configuration management, multi-standard terminal II. COGNITIVE RADIO SYSTEMS T I. INTRODUCTION HE number of baseband processing contexts of a Software-Defined Radio (SDR) system [1][2] rises increasingly as the number of standards to handle in a single handsets increases. Consequently, configuration management becomes a mandatory part of a reconfigurable system [3]. But most of the proposals in reconfiguration management didn’t consider both aspects of the reconfiguration: hardware reconfiguration capabilities, and reconfiguration schemes of application. It is necessary though to adapt reconfiguration mechanisms capabilities of hardware resources to the switching needs of the applications. This reconfiguration adequacy is essential to reduce reconfiguration overhead. We proposed in [4] a first step of such a global approach of reconfiguration management of multi-standard SDR systems. We concluded that reconfiguration management should be distributed and hierarchical, in order to support both HW heterogeneity and its associated reconfiguration capabilities on the one hand, as well as the reconfiguration schemes Manuscript received February 28, 2006. L. Godard, C. Moy and J. Palicot are with IETR, UMR CNRS 6164, Automatic and Communication lab / Supélec SCEE team, Rennes, France (+33 2 99 84 45 00; fax: +33 2 99 84 45 99; e-mail: [email protected], [email protected], [email protected]). A. A reconfigurable radio with sensing means The Cognitive Radio concept, here tackled in its widest and most unrestricted sense, refers to a communication system that is able to observe its environment, analyze it, and react to it in some way, as illustrated in Fig. 1. A CR system is a smart SDR system. The target of multistandard radio is of particular interest for this approach. In addition to its multiple communication parts, the CR system indeed features sensing means, adapting means and a smart sub-system dedicated to analyzing stimuli and making decisions [6]. Sensing means refer to all the possible methods the CR system has at its disposal for observing its environment, which can be categorized in four main families described below: electromagnetic environment: spectrum occupancy, Signal to Noise Ratio (SNR), multi-path propagation, etc. hardware environment: battery level, power consumption, computational resources load, etc. network environment: telecommunication standards (GSM, UMTS, WiFi, etc.), operators and services available in the vicinity, traffic load on a link, etc. user-related environment: position, speed, time of day, user preferences, user profile (access rights, > CrownCom'06 – First International Conference on Cognitive Radio Oriented Wireless Networks and Communications< contract…), video and audio sensor (presence detection, voice recognition), etc. Ele Ha ctr rd om wa re ag net ic Sensors Smart sub-sytem Adapting means SNR Spectrum occupancy propagation Memory capacity im u li rd O Analysis Power consumption Services Us Ne ertw rel ork ate d St er Decision Learning Standards Standard choice Audio / video alert Beamforming Decoder choice positionning Video Speed, position profile Audio Fig. 1 - Schematic functional view of a Cognitive Radio system As shown in Fig. 1, all the stimuli received by the sensors from the different layers (from physical to application) provide metrics that must be merged and analyzed jointly in order to reach the best possible reconfiguration decision. Reconfiguration is supported by a software radio or at least a SDR architecture, which integrates both adequate HW capabilities and SW architecture. Our previous research lead to the definition of a hierarchical configuration management [4], whose major features are recalled in part III. B. Smartness To make a reconfiguration radio become a CR, it is necessary to add some smartness in order to make it selfaware [5]. We assert this smartness itself needs also to be distributed all over the radio system. It may be foreseen indeed that a CR will have to face high-level decisions as well as local decisions. High-level decisions are to be understood in the sense that network considerations or constraints on the overall CR system may take benefit from reconfiguration. This is what is considered in End-to-End Reconfigurability [7]. Local decisions may concern for instance the reconfiguration of a single digital processing block unit (PBU) in order to adapt its behavior to a specific context (increase or decrease the number of fingers of a RAKE receivers depending on the number of path of the propagation channel). But high-level decisions moreover should impact the lowlevel layers of the system through the low-level decision entities. Which leads to the notion of a hierarchical repartition of the smartness through the CR system. In a CR system, it is consequently straightforward to merge the cognitive management we consider here, with the previously defined configuration management of part III, due to their identical features in terms of distribution and hierarchy. This paper is all about describing the combination and the relative interactions between them. III. RECONFIGURATION MANAGEMENT ARCHITECTURE The great diversity of processing types in a multi-standard application implies a large number of processing configurations to be managed. The configuration management is complex and we believe that a hierarchical approach of configuration control and management could simplify it [4]. The benefits of clustering the transmission chain have been presented in [8], particularly to manage partial reconfiguration of an application. In addition to this, the proposal of a hierarchical view enables to manage multi-granularity of configurations, which is of particular interest for heterogeneous architectures. The proposed model is composed of 3 levels of hierarchy detailed in Fig. 2. A system architecture compliant to this functional model includes one Configuration Manager at level 1 (L1_CM), several Configuration Manager Units at level 2 (L2_CMU), each of them being responsible of one or several Configuration Manager Units of level 3 (L3_CMU), which directly manage the processing components. Level 1 Config. Manager L1_CM Standard Set Std 1 _____ _____ _____ Std 2 _____ _____ _____ Std 3 _____ _____ _____ Standards Parameters Level 2 L2_CMU Function Set … Function Set … Function Set L2_CMU L2_CMU Functions Library Level 3 L3_CMU Block Set L3_CMU … L3_CMU Block Set … Block Set HW/SW Blocks Library Configuration Management Fig. 2 – Hierarchical distributed configuration management functional architecture A. L1_CM There is one L1_CM in the system. L1_CM is the orchestra's chief. It has the global knowledge of the state of the system thanks to databases. It can update these databases with the information it receives from the network on the one hand, and from the system itself on the other hand. In the latter case, reconfiguration procedure comes up to L1_CM as soon as several L2_CMU are concerned. The L1_CM watches over the coherency and the synchronization of the orders between the involved L2_CMU. L1_CM gives abstract orders of reconfiguration to the underlying L2_CMU when this is needed, as for a standard hand-over for instance. "Abstract orders" here is to be understood as orders that are completely independent from the nature of the HW processing devices they will impact. The only concern of L1_CM is to know where are the L2_CMU > CrownCom'06 – First International Conference on Cognitive Radio Oriented Wireless Networks and Communications< (on which HW devices) and how to reach them (through which method and communication media). B. L2_CMU The generic functions selected in level 1 through abstract orders are specialized at level 2 in accordance with standards specifications. The set of attributes of each function is handled by the L2_CMU to create each functional context of the entire processing chain. For example, a generic function "bit to symbol mapping" is set at level 2 to fit a selected standard. In the case of 802.11g standard this setting could be BPSK, QPSK, 16-QAM, 64-QAM constellation. At this level, in accordance to performance requirements of the function, the execution target is chosen. But the L2_CMU does not keep in charge the effective instantiation of the context onto the hardware. Here, functions are handled through a few attributes to completely characterize them. C. L3_CMU The processing datapath architecture supporting the PBUs depends on the reconfigurable computing resources of the hardware architecture. The complete processing path could be formed by different types of reconfigurable resources such as digital signal processors, FGPA, and inside FPGA: configurable accelerators, array of DSP blocks or a fine grain reconfigurable datapath. The main task of the L3_CMU in the configuration path is to find available processing resources and configure them to enable the execution of the functional context created at the superior level. Clock management of the resources is also included as an important feature of the configuration manager to reduce power consumption and to provide a efficient management of the multi-rate applications. IV. COGNITIVE RADIO ARCHITECTURE The Cognitive Radio architecture is an extension of the configuration management architecture towards a Cognitive Radio global management. A. Distributed Hierarchical CR architecture The distributed hierarchical management of part III enables to reconfigure the system with adequate features in answer to the constraints imposed by the scenarios of use foreseen for such systems [4]. But it supposed that the reconfiguration orders only come from outside the system. There is a need to include smartness in the system itself. This is all about CR. As stated in II.B, this smartness has to be distributed inside the CR system, as well as the configuration management. Consequently, an identical approach has been used for the CR architecture. Fig. 3 shows the direct correspondence set between the reconfiguration management and the cognitive management: • L1_CM → L1_CR (level 1 – Cognitive Radio) • L2_CMU → L2_CRU (level 2 – CR Units) • L3_CMU → L3_CRU (level 3 – CR Units) The CR architecture should be capable of taking decisions at any level of the system, under a global supervision. It also should be aware about the reconfiguration capabilities of all its different elements at both HW and SW levels. The advantage of this hierarchical structure is to permit to take into consideration many parameters of different kind at any level of the system on the one hand, and to impact the system in terms of reconfiguration either locally or at a large scale on the other hand. Level 1 Cognitive Manager L1_CR Config. Manager L1_CM Standard Set Level 2 L2_CRU L2_CMU Function Set … L2_CRU L2_CMU Function Set … Level 3 L3_CRU Cognitive Management L3_CMU Block Set … L3_CRU L3_CMU Block Set … Configuration Management Fig. 3 – Hierarchical distributed Cognitive Radio management functional architecture This hierarchical distributed architecture enables a wide range of possible scenarios. From global Quality-of-Service (QoS) considerations at the system level, to very local changes for bug-fixing or optimization purposes as instance. Let us consider the example of a large scale scenario: battery life extension. The distributed smartness reports to the L1_CR, through each L3_CRU, the status of power consumption of each of the PBUs of the system. Combining this information with others (concerning the QoS and the battery life duration) obtained through other elements of the CR architecture, the L1_CR takes appropriate decisions in terms of reconfiguration, in order to increase battery life duration for instance, while maintaining a targeted QoS. L1_CR gives this order to L1_CM that makes the necessary repercussion through the configuration management architecture of part III. Smartness may vary from very simple reactions such as a threshold detection and associated response (referenced in a pre-defined table) up to very complex scenarios. The complexity depends on the number of metrics to take into consideration and to fuse, as well as the decision engine implemented. The following paragraphs detail the behavior of the elements of the cognitive architecture that make these scenarios feasible. We opt in the following for a bottom-up description. > CrownCom'06 – First International Conference on Cognitive Radio Oriented Wireless Networks and Communications< B. L3_CRU L3_CRU role is firstly to capture sensing information and translate it into metrics. Metrics can either be computed by the L3_CRU itself or directly be provided by the PBU it is associated to. In this last context, the L3_CRU only receives the result form the PBU. Otherwise the L3_CRU receives the necessary information from the PBU (if not data themselves) which permits to obtain the metrics. L3_CRU operation may vary from a very simple data capture to a very complex signal processing procedure. The implementation complexity of the L3_CRU is consequently very variable. Thanks to the distribution of the smartness through the entire CR system, the L3_CRU may react in two different ways, which permits to deal with any situation the CR system has to face, as illustrated in Fig. 4. Level 2 reconfiguration orders metrics, notifications Level 3 L3_CRU metrics or sensing information reconfiguration orders (parameters) L3_CMU reconfiguration orders processing block unit (PBU) Fig. 4 – L3_CRU and L3_CMU message exchanges to manage a PBU On the one hand, the sensing information captured by a L3_CRU is known to have impacts at the only level of the PBU it manages. It is no use to propagate the information through the entire system. L3_CRU acts locally, takes reconfiguration decision, interprets the metrics and converts them into parameters it sends to L3_CMU, that makes the reconfiguration as in III.C. L3_CRU notifies to higher layers of the CR structure the changes that were done. Additionally, it is the role of the L3_CRU to verify the PBU behavior and inform higher CR layers of its status (error, stand-by, run, etc.). If the impact is larger, on the other hand, L3_CRU sends the metrics to L2_CRU. If all the PBUconcerned by the reconfiguration are under the same L2_CRU responsibility, L2_CRU plays the roles of L3_CRU in this specific context (takes reconfiguration decision and notifies to L1_CR). Paragraph IV.C explains in detail how L2_CRU acts. Note that the impact of a sensing information is a priori known. It is clear that depending on its nature, each stimulus has one or several well-identified causes and consequences (maybe combined with other stimuli). So that the reconfiguration procedures that could be provoked, as well as their range in the system are pre-defined. Another important point to stress is that the information sent from the lower CR layers to the higher layers have to be saved in databases that content the state of the system. Most of them will be located at the L1_CR level, but it can be considered to have databases accessible at L2_CRU level. As previously mentioned, the L3_CRU must be able to take decisions for a local impact of the sensing information. As stated for the metrics computation, the decision procedure may range from very complex to very simple processing. The following examples may be given, but the list is not exhaustive, in increment of complexity: - a simple threshold, - a state machine, - neural networks, - optimization heuristics (genetic, etc.). This range of decision procedures is also met at the higher levels L2_CRU and L1_CR. The higher the level, the more complex algorithms maybe expected. The reason is that more metrics are to be taken into account and combined, but also the processing devices are more suited to that kind of processing. Note that training techniques may also be implemented. C. L2_CRU L2_CRU gathers the metrics coming from the L3_CRU it supervises, as described in IV.B. Two situations may occur. Either it transmits the metrics to the L1_CR if these metrics may contribute to a decision of higher level than those it can take by itself. It may by the way update L2_databases if necessary. And/or it converts the metrics coming from the bottom in abstract reconfiguration orders (abstract in the sense that not HW-dependent) for a delivery to the L2_CMU. L2_CRU notifies the L1_CR of the reconfiguration it ordered. Then L2_CRU supervises the correctness of the L2_CMU reconfiguration orders thanks to all the L3_CRU reporting the behavior of the PBU that depend from this L2_CRU/L2_CMU couple. D. L1_CR L1_CR is the supervisor of the cognitive sub-part of the system. It is the intelligence that can take initiatives depending on the environment stimuli. These stimuli are translated into metrics. They can come from the bottom hierarchy of the CR system (L2_CRU and L3_CRU), namely from the processing part of all layers of the OSI model. The PBU composing the processing datapath may indeed process physical layer, as well as protocol, or even application layer operations. But this is also the L1_CR role to receive the metrics from the network side. There is a heavy debate on the repartition of the smartness between the network and the access point or terminal CR system. Our proposal can cope with all possible repartition of the smartness. Either L1_CR processes itself raw data and extracts the corresponding reconfiguration information to be delivered to the L1_CM, or it directly transmits to the L1_CM the pre-processed information it receives. Nevertheless, user-related information are under the responsibility of L1_CR, as well as other stimuli, that are not directly extracted from the signal processing of the radio protocol stack: battery level, time-of-the-day, air temperature, > CrownCom'06 – First International Conference on Cognitive Radio Oriented Wireless Networks and Communications< etc. The actions taken by the L1_CR can be of very different range and complexity, depending on the nature of the system (terminal, PDA or even bigger), and its degree of smartness. L1_CR is in charge of the multi-standard reconfiguration, and of any reconfiguration whose range concerns several L2_CMU. It also supervises the correctness of the reconfiguration it initiated. Such large scale reconfigurations (at the standard level) must respect extremely hard constraints in terms of synchronization between the PBU, time of reconfiguration, etc. It still seems an impossible challenge to really perform a real inter-standard soft-handover. But if it will be possible one day, a complete scenario must be played in the L1_CR, going through steps with the two standards running at the same time during a small portion of time at least. So that the two standards could catch the application at the very same time in order to maintain a continuity of the service. The decision at L1_CR may be triggered by various kinds of events, depending on the scenarios. All can not be anticipated. At least arrays of different operating mode (very good, good, energy saving mode, etc.) must be considered, so that the system can reconfigure itself according to its environment. The trade-off is to be done between the two following extremes. Either be very directive with all the supported scenarios in pre-defined databases. It is possible to consider they may be updated. Main advantage is that each scenario may have been pre-tested. Or a certain autonomy is let to the system. The degree of supervision of the user or the manufacturer/network/operator has to be defined. E. Li_CR(U) and Li_CM(U) interactions Fig. 5 aims generalizing the interactions described separately for the configuration management and the cognitive management in the previous sections. The Li_CM(U) may be activated either from the superior level(s) of the configuration management, or from the Li_CR(U) of the same level. Li_CM(U) manages reconfiguration and propagates reconfiguration orders down to the lower level, in order to do the reconfiguration of the PBUs. Level i+1 metrics, notifications Level i Li_CR(U) Level i-1 metrics, notifications reconfiguration orders reconfiguration orders either to the superior level Li+1_CR(U) or to the Li_CM(U) depending on pre-defined rules, that maybe static or computed on-the-fly. The Li_CR(U) also send notifications (of behavior accuracy for instance) to the higher level of CR management. So, we observe a bottom-up metrics and notifications flow in the cognitive management and a top-down configuration flow in the configuration management. V. CONCLUSION The functional Cognitive Radio architecture proposed in this paper relies on the configuration management architecture we already defined for SDR systems, and which is currently under implementation. This prototyping at the configuration management level strongly orientates our investigations towards cognitive management, which is a guarantee of the viability of our solution for the implementation the CR concepts. The originality of this proposal is due to the effective separation of the configuration and the cognitive management of a SDR system, while keeping a strong relationship between them. Respective roles and duties are well identified, as well as their interactions and interfaces with the other entities. Future papers will more precisely detail the intimacy of the architecture, and illustrate its mechanisms through examples of cognitive radio scenarios. Many points still remain to be investigated or at least better defined. Let us just stress the repartition and the content of the configuration and knowledge databases. Configuration databases are the repository of the necessary pieces of binary code (for processors) or bitstream (for FPGA) for the instantiation of the standards supported by the system. Knowledge databases contains the metrics from which the cognitive management takes its decision. The management of the decisions itself is a wide area we plan to address in the future. REFERENCES [1] [2] [3] [4] [5] Li_CM(U) reconfiguration orders Fig. 5 – Generalized view of the interactions between the CR and CM mangement. The Li_CR(U) receives or compute metrics extracted from stimuli of different kinds or from the inferior level of cognitive management, and decides to send the information [6] [7] [8] Joe Mitola, "The software Radio Architecture", IEEE Communications Magazine, May 95, pp. 26-38. Walter Tuttlebee, "Evolution of radio systems into the 21st century", Proc. IEE Int. Conf. on 'Radio receivers and associated systems', 1995. Apostolos Kountouris, Christophe Moy, "Reconfiguration in Software Radio Systems", 2nd Karlsruhe Workshop on Software Radios, Mar. 2002. Jean-Philippe Delahaye, Jacques Palicot and Pierre Leray, "A Hierarchical Modeling Approach in Software Defined Radio System Design", IEEE SIPS, Athens, Greece, 2-4 Nov. 2005 Joe Mitola, "Cognitive Radio – An Integrated Agent Architecture for Software Defined Radio", thesis dissertation, 8 May 2000. Christophe Moy, Alexis Bisiaux, Stéphane Paquelet, "An Ultra-Wide Band Umbilical Cord for Cognitive Radio Systems", PIMRC'05, Berlin Germany, September 2005. Markus Dillinger, Nancy Alonistioti, Nikolas Olaziregi, Thomas Wiebke, "Network Functions Supporting Reconfiguration in B3G Environments", Frequenz 58, May 2004, ISSN 0016-1136, pp. 121-125. Jean-Philippe Delahaye, Christophe Moy, Pierre Leray, Jacques Palicot, "Managing Dynamic Partial Reconfiguration on Heterogeneous SDR Platforms", SDR Forum Technical Conference, Anaheim, USA, 15-17 Nov. 2005 +0 # " 0 5 $ 0 (1#/#"("# / +< +" 1 ! ( 3# #) 3 +0 +0 A simulator for the design of the management architecture of cognitive radio equipments Godard Loı̈g, Moy Christophe, Palicot Jacques SUPELEC / IETR Avenue de la Boulaie CS 47601 F-35576 Cesson-Sévigné Cedex France Email : {loig.godard,christophe.moy,jacques.palicot}@supelec.fr Abstract— This paper presents a simulator for the design of the management architecture of cognitive radio (CR) equipments. It explains the requirements for CR systems in terms of reconfiguration, smartness and sensing capabilities. A key point in CR systems is the reusabilaty and the reconfiguration possibilities. Thus classical approaches, which refer to create specialized material to assume communication process, can not be used. Indeed, CR systems have to be deployed on a heterogenous execution platform to be flexible and adaptable. We propose a simulator based on a Hierarchical and Distributed Cognitive Radio Architecture Management (HDCRAM) meta-model to play CR scenarios. The simulator particularly helps defining precisely the needs in terms of CR management architecture. Index Terms—Cognitive Radio, multi standard, heterogeneous processing, cognitive management I. I NTRODUCTION The growing numbers of communication standards inside radio equipments leads to a query : how to design multiple radio communication chains without taking too much silicon space ? An answer is the use of Software Define Radio (SDR) techniques that give the opportunity to reconfigure a part or the totality of resources of the equipment [1]. Beyond this approach, Cognitive Radio (CR) aims at giving the opportunity to a communication system to be aware of its environment [2]. This smartness opens new possibilities for radio equipments : maintaining a good quality of service (QoS) with bad communication conditions, being able to handle new standards or new functions on demand and so on. It derives that future radio equipments have to be reconfigurable and aware of their environment. This possibility means to be able to sense their environment [3], interpret metrics, take decisions of reconfiguration in order to proceed some optimization, reconfigure the system and finally check the reconfiguration result. This is an infinite loop of reconfiguration in the sense that there are always new metrics coming from internal or external sensors that stimulate new reconfiguration operations. Our research work is focused on how a radio equipment can manage the reconfiguration after taking decisions in a heterogeneous computing context. We propose that for such systems composed of GPP, DSP, FPGA and ASIC, a hierarchical and distributed management for reconfiguration [5] and cognition [6] has to be deployed. This approach gives the opportunity to tackle fine granularity reconfigurations, as well as larger scale of reconfigurations according to the needs. This approach also underlines the fact that decisions of reconfiguration can be taken locally or globally in order to decrease the time response of the system in terms of optimization needs. In a first part we introduce the Hierarchical and Distributed Cognitive Radio Architecture Management (HDCRAM) we propose for CR equipments (such as terminals and base stations). Then we develop an example of a CR scenario where metrics are evolving, provoking some reconfigurations in the equipment and explain how our architecture can cope with it. The scenario is played by a simulator derived from a metamodel of the HDCRAM and which addresses : . Sensing means and metrics generation, . Metrics analysis, . Decision taking, . Reconfiguration management. Simulation results run permit to precisely define and derive actions and communications means required within the global CR management architecture of a CR equipment. Finally we conclude on the perspectives of our work on this simulator for cognitive radio equipment. II. T HE HDCRAM A. A CR Architecture Based On A Reconfiguration Architecture For SDR We derived from previous studies that a hirearchical and distributed SW architecture is required for the reconfiguration management of SDR equipments [4] [6] [7]. This helps supporting reconfiguration of a wide range of granularity, on heterogeneous HW computing resources (GPP, DSP, FPGA) with hard real-time constraints. It is composed of 3 levels of hierarchy above the flexible signal processing elements operating the radio modulation/demodulation. Then the SW reconfiguration management architecture for CR equipments has been extended. This consists in associating cognitive units to the reconfiguration units at each level of the management hierachy. We called it HDCRAM, which is illustrated in Figure 1. view of the architecture that has to be deployed in each cognitive radio system accordingly to its functionality. At Fig. 1. Hierarchical and Distributed Cognitive Radio Architecture Management (HDCRAM) B. A Hierarchy Description The architecture is based on 3 levels, each of them having both a reconfiguration management unit Li CM(U) and a cognitive management unit Li CR(U). There is only one L1 CR and one L1 CM. Several couples made of one L2 CRU and one L2 CMU are respectiveley depending on L1 CR and L1 CM. Each couple made of one L2 CRU and one L2 CMU are respectively managing several couples made of one L3 CRU and one L3 CMU. This architecture uses a specific control flow as show in Figure 2. The configuration management part is characterised by top-down communication exchanges whereas at the opposite, the cognitive management uses bottom-up communications. Note also a control flow from a CR(U) to its associated CM(U) for reconfiguration orders sending. Fig. 2. Control flow in HDCRAM Fig. 3. the top level is the general management of the equipment with the 2 entities L1 CM and L1 CR. The L1 CR manages the cognitive radio equipment behavior which includes the sensing and decision side of the management architecture, which can initiate reconfigurations of the radio equipment processing units, taken in charge by the L1 CM. At L1 level, the HW considerations of the signal processing elements implementations are abstacted. At the bottom, each signal processing element of the radio chain is associated with its own L3 CMU and L3 CRU for very fine reconfiguration and cognitive purposes. The design of each group made of a signal processing element, a L3 CRU and a L3 CMU is very dependent of the HW implementation imposed by the electronic device supporting the processing operation. Intermediate level L2 is both an abstraction level between top level L1 and bottom level L3, as well as a local manager for the L3 units it is responsible of. III. A N E XECUTABLE M ETA - MODEL C. A Meta-model For A CR Management Architecture In order to open this achievement to the community, the HDCRAM of Figure 1 has been modeled in UML (Unified Modeling Langage). The HDCRAM meta-model of Figure 3 underlines the dependencies, the vertical and horizontal connections as well as the cardinality of each class in the HDCRAM system. This meta-model represents a factorized Meta-model of the HDCRAM A. Kermeta The main advantage of the definition of HDCRAM in an object-oriented meta-language is to offer modularity and reusability. Classical object-oriented meta-languages such as MOF (Meta-Object Facility) [8] or EMOF (Essential MetaObject Facilities) offer a high level of abstraction of an application as well as facilities to specify Domain-Specific Language (DSL). However, these meta-languages only support structural specifications and does not support operational semantic. Kermeta [9] language was developed at the INRIA to support behavior definition and provide an action language to specify the body of operations in meta-models. With the help of Kermeta language we can describe our architecture with both structural and behavioral definition. This offers the oportunity to refine the meta-model with scenarios that ensure the system is conform to specifications. B. Developing Environment As Kermeta is fully integrated to EMF [10] (Eclipse Modeling Framework), it offers outline view to navigate through the whole model and meta-model, code autocompletion and various import/export transformations (ecore2kermeta, kermeta2ecore, kermeta2xmi - xmi version of your kermeta meta-model -, xmi2kermeta, xmi2ecore). These import/export transformation facilities offer to the designer the possibility to describe an application in a Topcased graphical view (class diagram as shown in Figure 3) and export it in an Ecore format. This format is the textual version of the graphical view as illustrated in Figure 4. Fig. 4. Meta-model Ecore view Then designer can specifiy his own DSL in Kermeta code to define the operational semantic of his meta-model and execute simulations (thanks to ecore2kermeta transformation). As the interoperability of Kermeta with EMF/Eclipse tools is well performed, the designer can also deploy a model of his architecture in a recursive editor, set the property of each class in a pre-defined state and save it (in a xmi format) or launch the application. A simple load action configures the system in a previously saved state as shown in Figure 5. IV. A HDCRAM S IMULATOR Kermeta executable language is not used here to perform heavy computational and complex cognitive algorithms but to precisely detail the intimacy of the architecture, and illustrate its mechanisms through examples of cognitive radio scenarios. A solution to cleary specify the reconfiguration and cognition capabilities of the HDCRAM is to express them in a language that matches CR domain constraints. Thus, a simulator has been developed, based on an executable meta-language that can handle CR domain constraints (such as awarness, spectrum access, decision making, reconfiguration achievement ). Fig. 5. Operation load in Kermeta code This simulator offers the oportunity to the designer, in a first step, to specify the CR system in a predifined state with parameters such as the standard in use, QoS, hardware repartition of operators on the execution platform, etc. In a second step, the designer can modify operators metrics via a shell command and validate interpretation and decision taken according to those changes. Execution steps clearly underline operations that have to be supported by both CR and CM part, at each level of abstraction of the architecture. A key point in CR levels is how metrics are captured, analysed and how decisions of reconfiguration are taken. Those operations are underlined in a cognitive cycle applied at each level of the cognitive management. This cognitive cycle derived from J. Mitola cognition cycle [11], benefits from the proposed hierarchical and distributed architecture. Indeed, distribution gives the opportunity to tackle multigranularity reconfiguration, decreasing architecture response time and offer the possibility to reconfigure only the necessary part of the architecture. Note that decision procedure may range from a very complex to a very simple processing. The following examples may be given, but the list is not exhaustive, in increment of complexity : • Simple threshold, • State machine, • Neural networks, • Optimization heuristics (genetic, etc.). The higher the level is, the more complex algorithms maybe expected. The reason is that more metrics have to be taken into account and combined. We present in Figure 6, the operations used by each CR(U) deployed in a system. Operations are defined as follows : • Evaluate metric : evaluate the impact of the metric on the level. • Analyze metric : identify the metric The result of those operations gives a performance appreciation from which CR(U) can define actions : • Define action : in a first step, the designer sets a list of actions. But in the future, this part can benefit from self learning capabilities. Fig. 6. Cognitive cycle • Prioritize action : according to the impact (in reconfiguration terms) of metrics on the system, a priority is given to the action. • Define Metric Strategy Action : This operation can be a simple metric sending (in an appropriate format) to the upper layer, or an order of reconfiguration sending to the corresponding CM(U) followed by a notification on the new configuration to upper layer. Once a CR(U) has taken a decision of reconfiguration, it sends new parameters to its associated CM(U) that is responsible of its application. Reconfiguration management must respect hard constraints in signal processing, reconfiguration time and free resources management. CM(U) levels also have to deal with pripority and must provide a task scheduler in order to achieve reconfiguration. This is exactly the kind of studies the simulator will help to investigate in future work. Section V will give an overview of a scenario played with our simulator. V. A S CENARIO For clarity purpose, the chosen sceanrio presents a local decision of reconfiguration. Local is to be taken here in the sense that there is no decision coming from the network. The equipment analyses its own performance and decides to reconfigure itself. The scenario underlines metrics capture, interpretation and decision of reconfiguration with effective reconfiguration. Figure 7 illustrates the deployed system in a tree view. We can recognize one L1 CM/L1 CR couple, only one L2 CMU/L2 CRU couple in this trivial example, as well as two couples of L3 CMU/L3 CRU supervising two operators : an ISI-capture (Inter Symbol Interference) and an equalizer operator. The role of an equalizer is to correct the disturbances due to ISI. An ISI metric generated by the ISI-capture operator is acquired by its L3 CRU which transmits it after interpretation, to its L2 CRU that orders (or not) a reconfiguration of the equalizer operator (activate or bypass it). In this scenario, there is no need for L3 CRUs to order a reconfiguration to their own operator. The equalizer for instance can not provoke its Fig. 7. Recursive editor and class properties view own reconfiguration from a metric it is generating itself. It is the duty of the L3 CMU controlling the operator equalizer to check operator behavior via acknoledgement signals. Reconfiguration orders are sent when the metric interpreted by the L3 CRU controlling the ISI-capture operator vary from good to bad and vice versa. This simple scenario has been chosen to facilitate understanding but more complex scenarios can be played. From this, L2 CRU interprets this metric in terms of radio signal quality. If incoming metric value is good then the signal ”performance” criteria (according to the meta-model attribute) is good and consequently L2 CRU’s performance is good. If metric value is bad then signal is disturbed and L2 CRU performance switches to ”disturb” state. According to its performance L2 CRU decides to send the reconfiguration order (corresponding to this situation) to the equalizer operator via its L2 CMU. In the current status of our work, to each situation corresponds a pre-defined action. This choice has been done in order to keep system integrity, nevertheless this may change in the future with the use of more sophisticated algorithms, having self learning capabilities for instance. Reconfiguration may range from deleting operator if there is a resource shortage or a low battery alert, bypassing it if ISI metric is good, or activating it if signal is degraded. If the equalizer is not deleted, the L3 CMU controlling the equalizer achieves the reconfiguration by selecting the appropriate file in its data-bases, and implements it on the platform if this is a new implementation or change data path to bypass it, if not. When a scenario is played, the designer enters new paramters coming from operators, in a shell (Figure 8) and validates the process with the returned information. Fig. 8. Command shell As previously indicated, the designer can also validate a scenario with the recursive editor of bottom Figure 9 that gives an overview of the current state of the selected object. simulator is based on a DSL that underlines both HW and SW constraints for the Cognitive Radio domain. The aim of the simulator is to permit to designers to include their own operators, thus increasing system’s operators data-base, defining (not exhaustive) : . Standard(s) and function(s) dependencies, . Metric nature and impact, . Possible HW target, . Reconfiguration management. Once defining operators and management rules, the designer can use them and validate the equipment behavior. Considering the current development of the simulator, distribution of both CR(U) and CM(U) managers through the design is let at the discretion of the designer. Nevertheless, it could be interesting to develop mathematical tools that deploy the HDCRAM through the design optimally. Although this paper also presents how decisions of reconfiguration are taken and how they are done in HDCRAM, this reveals the following issue. How to guarantee indeed a stable and effective global reconfiguration of the equipment if each CR(U) can decide of a reconfiguartion on its own ? Isn’t the best the enemy of the good ? These considerations have to be taken into account in both network and equipment point of view and will be adressed in forthcoming studies. Another open issue is to consider real time constraints at both decision and reconfiguration levels. Concerning the reconfiguration side on the one hand, we have studied and experimented many dynamic reconfiguration contexts which make us aware and confident about the limitations to be expected. On the other hand, we plan to address the decision side of the issue in our future research programs. R EFERENCES Fig. 9. L2 CRU state after a run As L1 CR is at the top of the architecture, it has to be informed of the performance of each function in use. Thus, it interprets metrics from functions and obtains a global performance of the standard operating in the equipment. From this, it can decide (or not) a reconfiguration action that can range from reparameterize one to all functions or even decide a standard change. As we are not considering a complete transmission chain in this particular scenario, L1 CR only receives interpreted metrics from L2 CRU and stores them. VI. C ONCLUSION This paper has presented a simulator for the design of the management architecture of cognitive radio equipments. This [1] Joe Mitola, ”The software Radio Architecture”, IEEE Communications Magazine, May 95, pp. 26-38. [2] Joe Mitola, ”Cognitive Radio An Integrated Agent Architecture for Software Defined Radio”, thesis dissertation, 8 May 2000. [3] Christophe Moy, Alexis Bisiaux, Stphane Paquelet, ”An Ultra-Wide Band Umbilical Cord for Cognitive Radio Systems”, PIMRC’05, Berlin Germany, September 2005. [4] Jean-Philippe Delahaye, Jacques Palicot and Pierre Leray, ”A Hierarchical Modeling Approach in Software Defined Radio System Design”, IEEE SIPS, Athens, Greece, 2-4 Nov. 2005 [5] Loı̈g Godard, Christophe Moy, Jacques Palicot, ”From a Configuration Management to a Cognitive Radio Management of SDR Systems”, in Proc. of CrownCom’06, Mykonos, Greece, 8-10 June 2006. [6] Jean-Philippe Delahaye, Christophe Moy, Pierre Leray, Jacques Palicot, ”Managing Dynamic Partial Reconfiguration on Heterogeneous SDR Platforms”, SDR Forum Technical Conference, Anaheim, USA, 15-17 Nov. 2005 [7] Apostolos Kountouris, Christophe Moy, ”Reconfiguration in Software Radio Systems”, 2nd Karlsruhe Workshop on Software Radios, Mar. 2002. [8] OMG. MOF 2.0 Core Final Adopted Specification, Object Management Group,http ://www.omg.org/cgi-bin/doc ?ptc/03-10-04, 2004. [9] Zaid Altahat, Tzilla Elrad, Didier Vojtisek, ”Using Aspect Oriented Modeling to localize implementation of executable models”, Models and Aspects workshop, at ECOOP 2007 [10] Budinsky, F., Steinberg, D., Merks, E., Ellersick, R. and Grose, T. Eclipse Modeling Framework. Addison Wesley Professional, 2003. [11] J. Mitola, Cognitive Radio : Making Software Radios More Personal, IEEE Pcrsonal Communications August 1999 '- : $ "#0 - 504 - " , '< 0 '#0. .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " '- $ "#0 - 504 - -( & ;!% % & , G @ >( 6 H 9 6 9 K > 9 6 .$ ,$ # #$ 03 6 < > 1 " '- '- " % %. 6 * 6 ,$ '0# 9 '> 6 & ' K 9 >( 1$ $ ! 6 6 9 9 > L + > ' 9 0 6 K , 2 G 96 K 2 26 - '- 5 3$ ' 9 & 6 A " :& c ( ;d 9 H '9 A >& > 9 + BG, P H< 6 A S > 3 2 > 6 ,92 + + + 1 9 < > ( & 9 >& 9 < > - '- 5 3$ < & > A < < 9 c ( @d 7 ' A 9 > ( 2 2 9 Q ,9 2 > 2 + < 6 & & 96 %' '> ' ' ) & 2 96 c = d < 2Z/ Z. , & 2 c ==d < 9 9 6 % ( 6 ) > 2 2 > < 69 $ 9 9A ( 9 ( + 9 1 '# 0/15 & 9 & 2 > ' < ( 1 . 6 > > 6 " , '< 0 '#0. % ' + 1") 9 B 504 6 Z/ % % ) / 1$ $ ! 1 " 2& : # Couche cachée de (I) RBF Couche de (J) sorties w1 f (γ , C1 ) x(t + mTe ) γ(1) DS P ŷ1 100 kHz f (γ , C i ) γ ( 0) 200 kHz f(γ,CI −1) γ(LTFD / 2) f (γ,CI ) Band adaptation ŷ J 300 kHz 6M & BPc 8 MHz wI DS/FH Detection DS / FH access mode with Time frequency analysis ) .! '- + /-) 1 >, . .$ ,$ # #$ 03 Logique de sortie $ "#0 - Prétraitement '- Detection Mono/ Multi carrier with Cyclostationnarity techniques on the Guard Interval IG ! Q & Standard " & & " Channel bandwidth & F U S I O N R Channel Filter DCS 1800/DCS 1900 200 kHz Gaussien 0.3 PDC 25 kHz Nyquist 0,5 CT2 100 kHz Gaussien 0.5 GSM 200 kHz Gaussien 0.3 EDGE 200 kHz Linearized Gauss GPRS 200 kHz Gaussien 0.3 PHS 300 kHz Nyquist 0.5 1 MHz Gaussien 0.5 Bluetooth (IEEE802.15.1) IS95 1.25 MHz FIR Globalstar 1.25 MHz RIF 48 coeff DAB 1.712 MHz Window DECT 1.728 MHz Gaussien 0.5 UMTS 5 MHz Nyquist 0.2 DVB-T 7–8 MHz DVB-S 32-36 MHz RLAN 10 MHz Nyquist 20 MHz Gaussien 0,5 32-36 MHz Nyquist 0.2 Hiperlan I LMDS Hiperlan II 50 MHz Window REC (Nyquist) 0,3 Window IEEE802.11g 20 MHz Window IEEE802.11b 1 MHz Gaussien 0,5 IEEE802.15.4 2 MHz Gaussien 0,5 &;% ( M & " & 7! % "& , '< 0 '#0. Standard ! ! + + # " '- " '- 504 $ "#0 - ' - .$ ,$ # #$ 03 1 " 2 + > % . . c =;d 1= = c =@d 9 Z B 6 2 "7 '- '- " A 1= ) 2 H " ) / : Z = 1B c =:d 1 K Z ? " 2 9 > G+ )), / B 3 Z'05`` , 9 9 92 A 3 Z'05`` A 6 > , ( 2 * 1$ $ ! 9 + + A ,$ '0# 6 5 > 6 9 < < !-0VV$ c =Bd /# 81 ! < 2 9> 19 >& 9 6 2 ( > 6 ,9 c ( d c ( ;d 2 K & >& 92 " , > , '< 0 '#0. A 2 ? '- $ "#0 - 504 - " , '< 0 '#0. .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " Dossier LA PERCEPTION DES CHAMPS ÉLECTROMAGNÉTIQUES AMBIANTS : UN 6ÈME SENS POUR L'HOMME ? La bulle sensorielle radio intelligente I Jacques PALICOT, Rachid HACHEMANI, Christophe MOY Supélec / IETR - Campus de Rennes 1. Introduction Commençons cet article par une analogie entre les systèmes de radiocommunications transportant de l’information et le transport terrestre de biens. Le transport ferroviaire est un lien physique entre 2 gares. Le train sur lequel se trouve le bien à transporter, doit suivre les rails et ne peut pas choisir d’autres « routes » et d’autres horaires. Notre analogie consiste alors à dire qu’une communication radio classique sur un lien standardisé du type GSM n’offre aucune autre possibilité que suivre les rails (une fréquence et une modulation données) pour que l’information atteigne le destinataire. Lorsque ce même bien est transporté via le réseau routier, il existe toute une infrastructure qui permet de rejoindre ces 2 mêmes points avec une liberté d’horaire. Poursuivons notre analogie, la radio logicielle sera pour nous l’infrastructure équivalente au réseau routier qui offrira un choix possible (sur la fréquence, la modulation…) pour transmettre l’information de l’émetteur au destinataire. Sur ce réseau, le conducteur 1 Mots clés Cognitive Radio, Capteurs intelligents, Radio logicielle, Terminal universel, Convergence aura toute liberté pour choisir son itinéraire en fonction de différents critères (temps de parcours, distance, coûts des péages, trafic prévu, heure de départ, etc...). De la même manière la radio intelligente permettra au terminal (le conducteur) de se déplacer dans l’infrastructure radio (grâce à la technologie radio logicielle) en ayant beaucoup plus de choix à sa disposition, grâce à l’information donnée par la « bulle sensorielle ». La « bulle sensorielle » est chargée de fournir au système radio intelligent des informations sur l’environnement du système afin que celui-ci puisse effectuer les meilleurs choix en terme de modulation, fréquence porteuse, codage canal,… à un instant donné, dans un lieu donné et pour un service donné. Dans ce travail sur la définition du concept de « bulle sensorielle » nous nous plaçons dans un contexte de « handover 1 généralisé », comme illustré par la figure 1, c’est-à-dire que les choix sont à faire parmi un ensemble pré-déterminé, constitué des standards de communications sans fil présents à l’endroit et au moment de la communication. Ce concept, initialement proposé dans le « Handover » correspond à une technique offrant une transition entre 2 points d’accès soit identiques (« Handover » horizontal) ou différents (« Handover » vertical). Cette transition peut être instantanée ou douce. L ’ E S S E N T I E L La radio intelligente est une évolution de la radio logicielle. Plus précisément cette dernière sera la technologie supportant ce nouveau concept de radio intelligente. La radio intelligente est un système de radiocommunications qui adapte son fonctionnement à l’environnement (au sens le plus large du terme). Nous présentons dans cet article, le concept de « bulle sensorielle ». Suivant ce concept, le terminal intelligent possède une bulle multidimensionnelle, dont le diamètre dépend, dans chaque dimension, du capteur considéré. Elle donne au terminal toute l’information nécessaire à la prise de décision adéquate quant à son fonctionnement optimal (en terme d’utilisation des ressources disponibles) et sécurisé (en terme de QoS). Pour expliquer notre propos, nous ferons des analogies avec la « bulle physiologique et psychologique » d’un individu ainsi qu’avec cette même bulle lorsque cet individu devient un conducteur dans un contexte automobile. Par ces analogies, notre but est de déduire des règles comportementales quant à l’utilisation des systèmes de radiocommunications. Il semble clair que la radio intelligente ne sera pas opérationnelle et disponible pour offrir les futurs services sur les réseaux des prochaines générations avant de nombreuses années, tant les difficultés techniques sont encore importantes. S Y N O P S I S Cognitive radio is an evolution of software radio. software radio indeed is an enabling technology for cognitive radio new concept. A cognitive radio communication system matches its behaviour to the environment (in its largest sense). This article introduces the concept of the "Sensorial Bubble". A cognitive radio terminal has a multi-dimensional bubble, the diameter of which depends in each of its dimension of the corresponding sensor. The bubble provides the terminal with all the necessary information to take adequate decisions concerning its optimal (in terms of available resources use) and secure (in terms of quality of service) operation. Analogies with the human "physiological and psycholigical bubble", as well as with the "driver bubble", are provided to illustrate the concept. These analogies aim in the long term at deducing behaviouring rules for future cognitive radio terminals. It is obvious that cognitive radio will not be a reality in the very next commercial wireless radio networks, but it will take many years to solve technical challenges still remaining. REE N° 9 Octobre 2007 1 Dossier LA PERCEPTION DES CHAMPS ÉLECTROMAGNÉTIQUES AMBIANTS : UN 6ÈME SENS POUR L'HOMME ? Figure 2. La radio logicielle idéale. Figure 1. Le Handover généralisé. contexte de [8] et maintenant communément adopté dans la communauté des radiocommunications, sous-entend une double mobilité : une mobilité géographique classique nécessitant un « handover horizontal » pour les systèmes cellulaires et une mobilité entre réseaux, standards, services. Cette dernière mobilité est offerte par le « handover vertical ». Celui-ci doit être réalisé en évitant les coupures de communication pour l’usager. C’est avec l’avènement des technologies radio logicielle que ce type de « handover » est devenu véritablement envisageable car une reconfiguration du terminal est nécessaire. Le « handover horizontal » quant à lui existe déjà actuellement dans les réseaux mobiles. C’est lui qui permet de se déplacer de cellule en cellule, mais il n’implique pas de reconfiguration du terminal. 2. De la radio logicielle à la radio intelligente Le marché de la téléphonie mobile grand public tend à intégrer de plus en plus de standards (GSM, GPRS, EDGE, UMTS) et autres moyens de localisation (GPS). A cela commence à s’ajouter la réception des réseaux de diffusion (FM, DAB, DVB-H) et des réseaux locaux à haut débit (WiFi). Il n’est pas envisageable économiquement de passer à ce stade avec les techniques habituelles de duplication des composants électroniques pour chacun des standards évoqués dans chaque produit. La radio logicielle vise à lever ce verrou. De plus, cette évolution technologique favorisera également l’insertion de nouveaux moyens de communication dans d’autres domaines faisant de plus en plus appel aux systèmes communicants. En particulier, une automobile intègre déjà plusieurs moyens de communication et ce n’est qu’un début. A la radio analogique (FM, GO), s’ajoutent désormais la téléphonie mobile, le GPS, le système de péage à distance. En outre, de nombreux autres systèmes s’apprêtent à être intégrés dans tous les moyens de transports routiers, connus sous le nom ITS (Intelligent Transport System) qui géreront de nouveaux systèmes radio tels que les systèmes anticollision, les systèmes d’information au guidage… Là encore, la radio logicielle offrira des solutions à coût moindre et efficacité supplémentaire pour la mise en commun de parties majeures de ces systèmes. 2 REE N° 9 Octobre 2007 Figure 3. La radio logicielle restreinte : l’architecture pragmatique dans l’exemple d’un récepteur. 2.1. La Radio Logicielle idéale (RL) La figure 2 présente le schéma de référence de la RL [1]. Les conversions s’effectuent tout de suite après l’antenne et l’architecture est entièrement reprogrammable car le processeur de signaux est directement relié aux convertisseurs. Les fonctions CAN et CNA haute fréquence et large bande seront très compliquées à réaliser et constituent, encore aujourd’hui, une des difficultés majeures de la RL. Il est à noter également que les contraintes de performance de calcul imposées au processeur sont également extrêmes et que ceci est aussi un défi technologique qui reste à relever. La consommation de puissance de tels systèmes est un challenge à relever également dans le cas de systèmes embarqués. La radio logicielle implique également la présence d’un système de gestion de reconfiguration qui s’ajoute aux organes de traitement du signal. 2.2. Les architectures actuelles : La Radio Logicielle Restreinte (RLR) On peut restreindre l’approche de la figure 2 à des sous-bandes du spectre afin de limiter les contraintes sur le matériel. Actuellement, les architectures des récepteurs les plus couramment utilisées (récepteur superhétérodyne) ne permettent pas une interchangeabilité des fonctions radio. Comme cela a été précisé précédemment et abondamment décrit dans la littérature, les contraintes pour réaliser une radio logicielle idéale sont telles que les acteurs du domaine ont proposé une radio logicielle restreinte dont le principe est basé sur l’architecture pragmatique [10] [11] présentée sur la figure 3 dans l’exemple d’un récepteur. Le FEA (Front End Analogique) conserve l’ensemble des fonctions analogiques qui ne peuvent pas encore être réalisées dans le domaine numérique. Ce sont par exemple certains filtres, les amplificateurs.... Ensuite vient directement la fonction de conversion analogique numérique. Celle-ci est effectuée par le CAN (Convertisseur Analogique-Numérique). Enfin le traitement du signal du FEN (Front End I La bulle sensorielle radio intelligente I Figure 5. La radio intelligente en fonction d’un modèle simplifié en 3 couches. Figure 4. Un schéma simplifié du cycle intelligent. Numérique) est effectué par un ou des composants programmables qui peuvent être de type logiciel (processeur) ou matériel (reconfigurable : FPGA ; paramétrable : ASIC), mais qui la plupart du temps combineront les deux. L’essentiel est qu’ils offrent la plus grande flexibilité possible au système. Les fonctions effectuées par ce bloc correspondent à des fonctions auparavant effectuées en analogique, mais que la technologie actuelle permet de réaliser en numérique. Parmi ces fonctions nous trouvons : le changement de fréquence d’échantillonnage, la transposition en bande base, le filtrage du canal d’intérêt... potentiellement peuvent utiliser ces canaux. Ensuite imaginons que le déplacement de chaque pièce n’est plus préalablement défini mais peut se construire en temps réel en fonction de l’environnement. Nous laissons au lecteur le soin de conclure sur le nombre infini de combinaisons possibles pour prédire la meilleure utilisation qui peut être faite du spectre sur la durée, et sur la nécessité de posséder une intelligence pour gérer ce problème : c’est la conclusion à laquelle est arrivée J. Mitola en 1999. La dénomination « radio intelligente », que nous utilisons dans cet article, est connue dans la communauté internationale sous la forme anglaise « cognitive radio ». 2.3. La radio intelligente D’après la définition qu’il en a donnée, un système de radio intelligente est capable d’adapter son fonctionnement à l’environnement grâce à : • des capacités d’analyse de sa situation, • une intelligence qui permet de prendre des décisions appropriées par rapport à des critères établis ou appris, • des capacités d’auto-reconfiguration pour adapter sa fonctionnalité. Ce cycle intelligent, déjà bien connu dans les domaines de l’IA et des réseaux de neurones est représenté de manière simplifiée sur la figure 4. Le contexte actuel de convergence entre systèmes d’information et de communication contribue non seulement à l’insertion de fonctions de télécommunications dans un nombre croissant de produits, mais également à l’intégration de plusieurs moyens de communication dans un même système. Un PC standard comprend désormais une liaison Bluetooth (pour envoyer le son aux écouteurs, par exemple), plusieurs standards de réseau local haut débit (IEEE 802.11a, b et g) et a la possibilité d’intégrer une carte 3G. On comprend l’intérêt, si ce n’est la nécessité dans certains cas (téléphone portable), de prendre en compte dans la conception globale du système la multiplicité des moyens de communication par voie radio et de gérer le spectre et ces différents interfaces air de manière intelligente. Il s’agit d’optimiser “ici et maintenant” l’utilisation de la ressource spectrale. C’est cette problématique de la gestion optimale du spectre qui est à l’origine des travaux sur la « radio intelligente » [2] [3] [4] [5]. Pour illustrer cette problématique, prenons une nouvelle image. Nous supposons que le lecteur connaît les règles du jeu d’échecs 2, et sait le nombre considérable de possibilités. Supposons maintenant, qu’un nouveau jeu d’échecs soit défini en remplaçant les 64 cases du damier par plusieurs centaines de canaux spectraux. Remplaçons les 32 pièces du jeu par plusieurs dizaines de signaux, qui 2 Comme une préoccupation essentielle actuellement est le coût de la ressource spectrale (rappelez-vous le prix exorbitant des licences UMTS) et l’optimisation de son utilisation, la radio intelligente est souvent réduite à l’optimisation de la ressource spectrale. Cependant, notre vision de la radio intelligente est plus générale que cela [17]. Pour expliquer notre approche, nous avons volontairement découpé en 3 grandes couches notre modèle de la figure 5 [16] : • Une couche de haut niveau, qui regroupe essentiellement la couche application, ainsi que les interfaces de type homme-machine. • Une couche intermédiaire dans laquelle on retrouve les couches transport et réseau. Exemple proposé initialement par Mitola dans [3]. REE N° 9 Octobre 2007 3 LA PERCEPTION DES CHAMPS ÉLECTROMAGNÉTIQUES AMBIANTS : UN 6ÈME SENS POUR L'HOMME ? Dossier • Une couche de bas niveau dans laquelle on retrouve les couches MAC et physique. L’ensemble de ces couches fonctionnent sur une plateforme radio logicielle (si possible idéale, mais notre modèle fonctionne aussi avec une plate-forme RLR). Ces plates-formes radio logicielles reposent sur une architecture matérielle d’exécution, qui généralement est hétérogène 3. Cette plate-forme est abstraite à travers une couche d’abstraction, qui offre une transparence en terme d’implantation de composants logiciels de traitement du signal que l’on y exécute. Dans ce modèle, nous avons dans la colonne de gauche fait figurer certains capteurs, cela sera rappelé plus loin (cf. section 3.4.1). Dans la colonne de droite sont cités les domaines de recherche relatifs à la couche en question avec lesquels la radio intelligente entretient des liens très étroits. Bien entendu comme notre objectif est d’optimiser le fonctionnement de ces 3 couches de manière intelligente, la radio intelligente aura aussi un lien très proche avec le domaine émergent de l’optimisation inter-couches. 3. La bulle sensorielle radio intelligente 3.1. Généralités Le concept proposé dans cet article consiste en une nouvelle approche dans laquelle le terminal voyage avec une “bulle” qui lui est associée. Cette bulle lui assure une sécurité de transmission, c’est-à-dire sans interférences avec les autres usagers. A tout moment le gestionnaire de la bulle connaît parfaitement tous les signaux entrants et sortants de la bulle. Grâce à une analyse fine de situations déjà vécues, le terminal peut prédire et anticiper les émissions radioélectriques, de manière à optimiser des critères prédéfinis. Figure 6. La Bulle sensorielle. Il s’agit à partir d’informations fournies par des capteurs de tout niveau d’analyser le “comportement” des 3 4 signaux entrants et sortants de la “bulle” et de prédire l’évolution de ce comportement. 3.2. Analogie avec la « bulle physiologique et psychologique » La première analogie que l’on peut faire est avec la “bulle physiologique et psychologique” humaine. Cette bulle est l’espace virtuel dont les dimensions sont fixées par les 5 sens. Une personne se sent à l’aise et en sécurité dans cette bulle, ou au contraire peut se sentir agressée si cet espace est violé. Elle connaît tout ce qui se passe dans cette bulle et traite de nombreuses informations concernant ce volume, qui lui sont fournies par ses sens. De la même manière, la bulle sensorielle radio intelligente est dimensionnée par les capteurs qui analysent les ondes électromagnétiques reçues. Suivant notre analogie, nous pourrions parler d’un sixième sens fourni par l’analyse de ces ondes électromagnétiques. Par exemple, un capteur peut détecter des trous dans le spectre (zones non utilisées à l’instant présent dans le lieu d’écoute), un autre peut déduire quel est le standard de communication le plus approprié (selon des critères qui restent à déterminer) pour effectuer telle connexion à tel service (voix, visioconférence, téléchargement…). La bulle sensorielle radio intelligente permet de transmettre ces informations aux couches hautes d’un terminal radio intelligent qui peut alors initier de manière autonome une phase de reconfiguration [15] afin d’adapter son fonctionnement à la situation, suivant des règles pré-établies ou apprises. Afin d’avoir un maximum de degrés de liberté pour s’adapter, on comprend qu’un système radio intelligent tire un grand bénéfice des technologies radio logicielles. 3.3. Analogie avec la « bulle du conducteur d’un véhicule » La seconde analogie que l’on peut faire est celle de la « bulle du conducteur d’un véhicule ». C’est une extension de la « bulle humaine » au contexte de l’automobile. Désormais, la « bulle » (sphère virtuelle) que l’on considère est celle qui se déplace autour d’un véhicule et qui est associée aux sensations de son chauffeur. Un conducteur doit avoir un certain nombre d’informations sur son environnement et comprendre leur implication par rapport à son activité de conduite. Ce sont ses sens qui lui communiquent ces informations. Dans notre contexte, l’information entrant dans la bulle est augmentée à l’aide de : • moyens de signalisation, de signaux émis par les autres véhicules et par l’infrastructure routière, • règles universelles associées à la conduite et appliquées par tous les conducteurs, • l’expérience associée à des situations déjà vécues et Hétérogène signifiant ici que la plate-forme d’exécution est composée de circuits DSP, GPP, ASIC, FPGA, ... REE N° 9 Octobre 2007 I La bulle sensorielle radio intelligente qui lui permet de prédire et d’anticiper les dangers. L’exemple suivant illustre davantage encore l’analogie. Le but d’un conducteur est d’aller d’un point à un autre sans accident, en respectant certaines contraintes de temps de trajet, de nombre de kilomètres, de prix,… Il effectue tout cela grâce à l’ensemble des éléments dont il dispose à chaque instant dans sa « bulle ». De la même manière, un terminal radio intelligent a pour but de transmettre un certain contenu sans erreur avec certaines contraintes en termes de qualité de service, durée, débit, prix… Nous voulons ainsi faire le lien entre la bulle radio intelligente et la bulle d’un véhicule. Il faut donc étudier des situations équivalentes et en déduire des règles pour la radio, en s’inspirant des règles existantes déjà éprouvées pour la circulation automobile. Par exemple, tourner à gauche ou à droite pour changer de rue est équivalent à changer de standard. Il faut dériver toutes les règles de la radio intelligente. Y a-t-il une priorité à droite par exemple ? Un challenge est d’étudier tous les capteurs capables d’apporter des informations pertinentes à un système radio intelligent afin qu’il dispose d’un maximum d’information. La complexité des traitements à effectuer par la couche physique d’un système radio intelligent requiert une architecture de gestion de reconfiguration performant car il est constitué d’une pluralité d’unités de traitement de types différents (DSP, FPGA, ASIC…). Le système doit en outre constamment être capable de s’adapter en fonction des besoins de l’utilisateur et des conditions de l’environnement (point d’accès, sélectivité du canal de transmission, interférences, disponibilité en terme de ressources spectrales). Le rôle de la bulle est de fournir tous les moyens à la couche physique d’un système radio intelligent pour qu’il puisse effectuer les changements nécessaires à son fonctionnement afin d’être adapté au mieux à son environnement. L’objectif de la RI est de donner l’autonomie aux systèmes vis-à-vis de leur utilisation du spectre. Un terminal est assimilé à un conducteur de voiture guidant son véhicule dans l’infrastructure routière, mais pour le terminal, c’est à travers le spectre fréquentiel qu’il se déplace. 3.4. Les capteurs de la bulle sensorielle Le terme capteur est ici utilisé dans son sens le plus général. Il correspond à tout moyen fournissant une information pertinente. Cela peut, bien entendu, être un capteur au sens classique du terme (microphone, caméra, antenne...) aussi bien qu’un algorithme de traitement du signal (estimation du canal, rapport signal sur bruit...). On peut classifier ces capteurs suivant 3 alternatives décrites ci-après. La première consiste à les classifier en fonction des couches de notre modèle simplifié (cf. section 3.4.1). La seconde possibilité est un classement en fonction de l’environnement considéré et est décrit en section 3.4.2. Enfin la troisième possibilité sera un classement proposé I en fonction de 2 types de cartes que nous définirons et utiliserons au chapitre 4. 3.4.1. Classification en fonction de la couche du modèle simplifié Nous avons, dans le paragraphe 2.3, déjà présenté notre vision de la radio intelligente en fonction d’un modèle simplifié en 3 couches. Nous proposons dans cette section un classement des capteurs en fonction de la couche considérée. La classification qui en résulte est présentée sur le tableau 1 suivant. Capteurs Couches Profil utilisateur (prix, opérateur, choix personnels), son, image, position, vitesse, sécurité,... Application et interface hommemachine « Handover » vertical inter et intra réseaux et standards, charge sur un lien, Transport, réseau reconnaissance de standards,... Type d’accès, modulation, codage canal, fréquence porteuse, symbole, estimation Liaison Physique de canal, lobes d’antennes, puissance, ressources matérielle disponible,.... Tableau 1. Classement des capteurs en fonction du modèle en couches simplifié. 3.4.2. Classification en fonction de l’environnement considéré Nous rappelons que, dans cet article, sont considérés comme des capteurs, tous les moyens permettant d’observer l’environnement du terminal et de renseigner celui-ci sur les modifications actuelles ou prévisibles de cet environnement. Nous proposons dans cette section de classer les capteurs en quatre grandes familles : chaque famille ciblant un type d’environnement particulier. Cette classification est présentée sur le tableau 2 suivant. Tous les stimuli reçus par les capteurs de ces différents environnements doivent être analysés et traités conjointement de manière à obtenir la meilleure décision possible. Dans ce tableau, nous avons illustré un cas particulier d’environnement utilisateur. C’est un exemple souvent cité dans la littérature du « Context Aware », il s’agit de l’application médicale d’aide au diagnostic en temps réel, pour des patients. Cet exemple, fait bien entendu appel à des capteurs particuliers, adaptés à l’application souhaitée. La liste présentée n’est bien sûr pas exhaustive et peut dépendre de la pathologie du patient. Dans ce même ordre d’idées, nous pourrions prendre d’autres exemples applicatifs tels que l’aide aux services de sécurité (police, pompiers). Dans ce cas d’autres capteurs se révéleraient nécessaires. REE N° 9 Octobre 2007 5 LA PERCEPTION DES CHAMPS ÉLECTROMAGNÉTIQUES AMBIANTS : UN 6ÈME SENS POUR L'HOMME ? Dossier Classes Capteurs Environnement électromagnétique Rapport signal à bruit, Occupation spectrale Détection de « trous » dans le spectre Propagation, Puissance reçue, Réponse impulsionnelle du canal,... Environnement réseau Nombre de points d’accès, Nombre d’utilisateurs, Charge sur un lien radio, Reconnaissance de standards, Opérateurs disponibles,... Environnement matériel Niveau de batterie, Consommation, Niveau d’utilisation des ressources (nombre de portes dans un FPGA, mémoire nécessaire) Environnement de l’utilisateur Capteurs audio et vidéo, Le profil utilisateur, Choix personnels Position, Vitesse, Température extérieure, Luminosité,... Cas d’école : contrôle médical Environnement de l’utilisateur Température de l’utilisateur Pression sanguine, Niveau de glycémie... Tableau 2 . Classement des capteurs en fonction de l’environnement. 4. Les cartes spatiales et spectrales Grace à l’information fournie par la bulle, nous pouvons construire 2 cartes. La première est une carte spatiale classique sur laquelle nous reportons l’information spatiale pertinente fournie par certains capteurs. La seconde carte que nous construisons est totalement artificielle. Nous y reportons toute l’information spectrale donnée par certains capteurs. Notre objectif est de construire une carte dans laquelle le terminal peut se déplacer dans l’environnement spectral. Nous souhaitons poursuivre nos analogies en imaginant une nouvelle analogie entre le terminal se déplaçant sur cette carte et un conducteur se déplaçant sur une carte routière. Un objectif complémentaire à ce niveau, serait de déduire de cette nouvelle analogie des règles d’utilisation du spectre à partir des règles du code de la route. 4.1. La carte spatiale Sur la carte spatiale est reportée l’information utile donnée par certains capteurs... Cette information est par exemple, la position des stations de base, la position des autres terminaux... Un exemple d’une telle carte est donné sur la figure 9. 4.2. La carte spectrale Comme déjà précédemment expliqué, l’information fournie par un grand nombre de capteurs va être mise à profit pour construire une nouvelle carte appelée « carte 6 REE N° 9 Octobre 2007 Figure 9. Un exemple de carte spatiale. spectrale ». La construction d’une telle carte requiert un certain nombre de règles. Ces règles sont à nouveau déduites d’analogies avec la carte spatiale. Un exemple d’analogie expliquant simplement et clairement cette approche est la suivante. Sur cette carte spectrale, les routes seront les standards. De la même manière qu’une autoroute permet d’écouler un trafic plus important qu’un chemin vicinal, un lien radio Wifi permettra d’écouler un débit plus important qu’un lien GSM. Pour cette raison la largeur de la route tracée sera significative du débit écoulé par le lien radio considéré. Nous avons donc établi de nombreuses règles de construction de cette nouvelle carte spectrale. Certaines sont données dans le tableau 3 ci- après. La carte ainsi construite peut être adaptée en fonction de l’application et des besoins des utilisateurs. Le terminal peut choisir de ne s’intéresser qu’à une partie de la carte en fonction d’un objectif prédéterminé. Cette carte est donc un outil représentant certaines des informations accessibles dans la bulle grâce aux capteurs. Quelques exemples de cartes obtenues avec des scénarios les utilisant sont présentés au paragraphe 5. Remarque : Il est très important de noter ici que cette carte spectrale évolue constamment en fonction du déplacement du terminal dans la carte spatiale. La carte spectrale a une existence limitée en temps et position. Carte routière Conducteur Véhicule Type de route Largeur de la route Nom de la route Route à sens unique Vitesse du véhicule Bifurquer( changer de type de route) Bifurquer sur le même type de route Route interdite Police Voie privée Limitation de vitesse Priorité à droite Embouteillage Carte spectrale Gestionnaire du terminal Terminal Type de standard Débit du standard Opérateur Diffusion Débit du terminal « Handover » vertical Changer d’opérateur pour la même standard Bande interdite Organisme de régulation Bande réservée Débit alloué limité Accès prioritaire à un standard Lien radio surchargé Tableau 3. Exemples de règles pour construire la carte spectrale. I La bulle sensorielle radio intelligente I UN EXEMPLE DE CAPTEUR : LE CAPTEUR DE RECONNAISSANCE AVEUGLE DE STANDARDS Pour illustrer cette bulle sensorielle intelligente nous présentons un exemple de capteurs que nous étudions actuellement [12]. Celui-ci, comporte les trois phases présentées sur la figure 8. Dans une première phase, le signal large bande reçu est analysé de manière très simple et très grossière de manière à déterminer les plages de fréquence contenant une énergie significative. Cette analyse est effectuée de manière itérative telle que le montre la figure 7. Ensuite dans les bandes sélectionnées une analyse beaucoup plus précise est effectuée, durant la seconde étape. Cette analyse permet d’accéder à des informations comme la bande passante du canal, la distinction entre signal mono et multi porteuse, le type d’étalement de spectre... Dans cette seconde étape, la bande passante du canal, qui a été précédemment identifiée comme étant un paramètre discriminant, sera déterminée suivant une méthode publiée dans [6], que nous avons améliorée en prenant en compte de nombreux standards IEEE 802.x apparus depuis [6]. Notamment, le résultat d’une analyse temps/fréquence permet de distinguer le saut de fréquence de l’étalement par séquence directe et est directement utilisé pour discriminer Bluethooth du Wifi IEEE.802.11.b, deux standards qui ont des caractéristiques très proches [14]. De même, l’étude des cyclostationarités sur la bande étudiée donne des informations sur l’absence ou la présence d’un signal de télécommunications [13], ainsi que sur les caractéristiques de ce signal éventuel (type de modulation...). Chacune de ces informations n’étant pas suffisamment significative en soi, une fusion de l’ensemble des ces informations est réalisée en troisième étape. Le résultat en sortie de l’organe de fusion est une décision la plus fiable possible sur le standard présent dans la bande considérée. La manière la plus simple de réaliser cette fusion est d’appliquer des règles logiques mais n’est pas très robuste. Des solutions plus évoluées basées sur des réseaux de neurones sont actuellement à l’étude. Figure 7. Détection itérative de l’énergie. Figure 8. Le capteur reconnaissance aveugle de standard. REE N° 9 Octobre 2007 7 Dossier LA PERCEPTION DES CHAMPS ÉLECTROMAGNÉTIQUES AMBIANTS : UN 6ÈME SENS POUR L'HOMME ? Figure 10. Choix du standard en fonction du coût. Figure 11. Equivalence voie privée: standard réservé. 4.3. Classement des capteurs suivant le type de carte La carte spectrale est un outil basé sur l’information de la bulle de manière à être utilisé par le terminal en s’inspirant de règles du « code de la route ». De nombreux capteurs contribuent à construire la carte spectrale. Le terminal peut utiliser d’autres informations de la bulle (telles que : le nombre de terminaux détectés, leurs positions relatives, la position des stations de base...) et positionner sur la carte spatiale ces informations, de manière à les utiliser intelligemment (conjointement ou non avec la carte spectrale). Nous pouvons donc classer les capteurs suivant le type de carte. Type de carte Capteurs Carte spatiale Direction d’arrivée des signaux. Position sur la carte. Position des stations de base et des points d’accès. Position et distance vis-à-vis des autres terminaux... Carte spectrale Fréquence porteuse, fréquence symbole, occupation du spectre, reconnaissance de standards, de modulation, de l’intervalle de garde, du codage, de l’accès, du nombre d’utilisateurs,... Tableau 4. Classement suivant le type de carte. 5. Quelques analogies avec des situations routières Nous présentons dans ce paragraphe trois exemples, très simples, d’analogies entre un véhicule confronté à une situation routière particulière et un terminal confronté à une situation analogue dans la nouvelle carte spectrale telle que définie précédemment. Le scenario de la première analogie décrite sur la figure 10 consiste à faire un choix entre une communication à haut débit et un coût faible de transmission. Comme cela a été défini dans le tableau 3, la largeur de la route sur la carte spectrale est directement reliée au débit du standard. En fonction des besoins utilisateurs, le terminal choisit ici la route la moins chère, celle qui correspond à l’opérateur 3. 8 REE N° 9 Octobre 2007 Figure 12. Equivalence embouteillage=encombrement spectral. La seconde analogie de la figure 11 considère une route débouchant sur une voie privée interdite. Pour notre terminal cela signifie que ce standard est réservé. Il peut s’agir, par exemple, d’un réseau professionnel. Dans cette situation le terminal bascule obligatoirement sur la voie de l’opérateur 1. Dans le troisième scenario présenté sur la figure 12, une équivalence entre un embouteillage routier et une surcharge d’un lien radio est présentée. Dans cet exemple, nous avons à la fois la diminution de vitesse imposée par l’embouteillage (équivalent à la diminution du débit du terminal liée à la diminution de la ressource disponible) et le fait que le terminal change de route donc de standard (en fait ici, d’opérateur) pour éviter cet embouteillage. 6. Conclusion Nous avions conclu un précédent article REE sur la radio logicielle [7] par la phrase suivante : « Enfin, une orientation importante à nos yeux est la volonté d’aller vers une RL de plus en plus « intelligente » (la Cognitive Radio en anglais) ». C’est cette radio intelligente que nous avons présentée dans cet article, à l’aide d’un nouveau concept : la « bulle sensorielle ». Celle-ci, grâce à de nombreux capteurs de toute nature, fournit des informations qui sont utilisées par le terminal intelligent. Nous avons expliqué comment il peut, par exemple, adapter son comportement à l’aide d’une carte spectrale construite I à partir des informations fournies par la bulle. A l’aide d’analogies routières, nous proposons de déduire des règles de « déplacement » sur cette carte spectrale. Cela est en cours d’étude et fera l’objet de futures publications. 7. Remerciements Ce travail a été effectué dans le cadre du projet E2R II qui est finance par le Sixième Programme Cadre de la Commission Européenne. Ce document n’exprime que le point de vue des auteurs et n’engage en rien la Communauté dans l’utilisation qui peut en être faite. La contribution de collègues du consortium E2R II est ainsi remerciée. Les auteurs remercient aussi la Région Bretagne, pour le support financier apporté à ce travail. Références [1] I La bulle sensorielle radio intelligente J. MITOLA, “The software Radio Architecture”, IEEE Communications Magazine, May 95, pp. 26-38. [2] J. MITOLA, “Cognitive Radio: An Integrated Agent Architecture for Software Defined Radio”, Ph.D. dissertation, Royal Inst. of Tech., Sweden, May 2000. [3] J. MITOLA & al., “Cognitive radio: Making Software Radios More Personal”, IEEE Personal Communications., vol. 6, no. 4, pp. 13-18, August 1999. [4] S. HAYKIN, “Cognitive Radio: Brain-Empowered Wireless Communications ”, IEEE Journal on Selected Areas in Communications, vol. 23, no. 2, pp.201-220, February 2005. [5] F. K. JONDRAL, “Software-Defined Radio-Basics and Evolution to Cognitive Radio”, EURASIP Journal on Wireless Communications and Networking 2005:3, 275-283. [6] C. ROLAND, J. PALICOT, “A New Concept of Wireless Reconfigurable Receiver”, IEEE communications Magazine, July 2003. [7] J. PALICOT, C. ROLAND, « La Radio logicielle : Enjeux, contraintes et perspectives », REE n° 11, décembre 2001. [8] http://www.wireless-world-research.org/ [9] http://www.gnu.org/software/gnuradio/ [10] T. HENTSCHEL (Dresden University of Technology), SORT, Contribution : SORT/WP3100/TUD/002. [11] TRUST, IST-1999-12070, D3.1.1, “Proposal and Initial Investigation of Certain Know and Augmented Analogue Signal Processing Techniques for Future Flexible Transceiver architectures”, 29 September 2000. [12] R. HACHEMANI, J. PALICOT & C. MOY, “A New Standard Recognition Sensor for Cognitive Radio Terminal”, EUSIPCO’07, Poznan, Poland, September 2007. [13] M. GHOZZI, M. DOHLER, F. MARX & J. PALICOT, “Cognitive Radio: Methods For The Detection of Free Bands”, Comptes-Rendus Physique, Elsevier, volume 7 September 2006, pp 794-804. [14] M. GANDETTO, M. GUAINAZZO & C. S. REGAZZONI. “Use of Time-Frequency Analysis and Neural Networks for Mode Identification in a Wireless Software-Defend Radio Approach”, Eurasip Journal of Applied Signal Processing, Special Issue on Non Linear Signal Processing and Image Processing, 13:1778{1790, October 2004. [15] L. GODARD, C. MOY & J. PALICOT, “From a Configuration Management to a Cognitive Radio Management of SDR Systems”, IEEE CrownCom’06, 8-10 June 2006, Mykonos, Greece. [16] A. NAFKHA, R. SÉGUIER, J. PALICOT, C. MOY & J. P. DELAHAYE, “A Reconfigurable Base Band Transmitter for Adaptive Image Coding”, ST Mobile SUMMIT 2007, 1-5 July 2007, Budapest, Hungary. [17] C. MOY, A. BISIAUX & S. PAQUELET, “An Ultra-Wide Band Umbilical Cord for Cognitive Radio Systems”, PIMRC’05, Berlin, September 2005 (IEEE Personal Indoor and Mobile Radio Communications). L e s a u t e u r s Jacques Palicot a obtenu en 1983 le diplôme de Docteur en traitement du Signal de l’université de Rennes. De 1988 à 1991, il a travaillé sur les techniques d’égalisation appliquées aux nouveaux systèmes de télévision améliorée, pour le centre de recherche de TDF à Rennes : le CCETT. Ensuite, depuis 1991 ses études concernent le domaine des communications numériques et tout particulièrement le domaine du traitement adaptatif du signal. Depuis 1998, il s’est particulièrement investi dans les nouvelles technologies connues sous le nom de “Software Radio” et « Cognitive Radio ». Depuis octobre 2003, il est Professeur et responsable de l’équipe de recherche Signal Communications & Electronique Embarquée (rattachée à l’IETR, entité CNRS-6164), à SUPELEC sur le Campus de Rennes. Rachid Hachemani a obtenu le diplôme d’ingénieur en électronique de l’université des sciences et de la technologie d’Oran en 2003. Il a obtenu en 2006 le diplôme de Master2 recherche MARS (Systèmes et réseaux, Architecture et micro-technologie) de l’Institut National des Sciences Appliquées de Rennes. En thèse de doctorat à l’École Supérieure d’Électricité : Supélec, au sein du laboratoire SCEE, il travaille actuellement sur le domaine de la radio logicielle et de la radio intelligente. Une partie de ses travaux de thèse sont une contribution au projet Européen E2R (End-to-End Reconfigurability). Christophe Moy a obtenu en 1995 les diplômes d’ingénieur et de DEA de l’INSA de Rennes. Il a effectué son doctorat dans le domaine des techniques à étalement de spectre et a obtenu le titre de docteur de l’INSA de Rennes en 1999 dans la spécialité électronique. Il a ensuite travaillé 6 ans dans le laboratoire de recherche européen de Mitsubishi Electric à Rennes. Il a participé à la création et à l’activité de l’équipe « Software Radio » et a été le représentant de Mitsubishi Electric au SDR Forum de 1999 à 2004. Depuis 2005, il est enseignant/chercheur à Supélec, sur le campus de Rennes, et effectue ses recherches dans l’équipe SCEE de Supélec. Il est impliqué dans les activités de radio logicielle (« Software Radio ») et de radio intelligente (« Cognitive Radio »). Il a participé et participe encore à de nombreux projets collaboratifs tels que le projet intégré E2R et le réseau d’excellence Newcom de l’IST au niveau européen, les projets RNRT A3S, IDROMel et RNTL Mopcom au niveau national et du pôle de compétitivité Images & Réseaux de Bretagne. REE N° 9 Octobre 2007 9 '- $ "#0 - 504 - " , '< 0 '#0. .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " '- $ "#0 - 504 - 6 "' " 1 9 9 2 < >& 6 + 2 6 1$ $ ! 1 " '- '- " " A < .$ ,$ # #$ 03 6 > > > A > " > A 2 9 > > < + 2 9 > < 6 < 2 + 2 9 E 9 9> < 6 & 96 9 < ( '$ / 6 A >& 5 ,$ 03 '9 > 6 A > < , A 6 + + < 6 N K 9 G + 9 K <6 > < 9 & > > K +6 6 + 6 P, 9 ( 1 K A H 2 9 6 6 A 9 G > ( & , 6 + 9 > > 6 6 > 6 ( 6 )< A K 9 % ) " , '< 0 '#0. > > A 6 9> 6 H< 9 9 < % K 9 6 2 6 < 9 6 92 H < ( & 9 6 + + 6 > 2< 6 < 6 < 2 ,9 + > 9 0 ( 2+ K < G 9 $# 6 )< % O & '- 504 $ "#0 - - G & 5 2 " > + > , ' % & ' % '- 9 > '9 6 > > > A9 . $ A A < < 6 G H " , ' A9 + + + > ' 5 > + 6 < 6 < C 6 , > & G' 9 6 " ( & H 6 <' "( & ) =& 6 > > >& =" 5 9 > > 5 " > 9 9 9 < 5 9 6 6 5 Q > > > > ' & > ,9 *9 "1 / > > 9 $ A , 2 & , ' > " ( 2 , ' 2 < '" % " ' > '- " 6 <A 6 1 " 6 > <6 > " 1$ $ ! > + > > " .$ ,$ # #$ 03 9 B '9 96 > 6 ( , ' " 9 ) % , 9 - , , ' "1 ' ) & % 9 / = 6 6 < 96 '" 1 9 6 " , ' >& 2 " > ) ) 2 "2 % " , ' 1 ( ) 2 % 9 1 9 A A 9 9 < & 9 = B $ 5 3-$ %, & 9 8 > , & A (, =& = 6 9 A ( & ,96 + < " & ' A ) '" ( > 6 " , '< 0 '#0. % < 9 O 69 9 F '- 504 $ "#0 - - H &; % # @ 7! # < > > 6 , & #' % ( < 6 > 9 & .$ ,$ # #$ 03 1$ $ ! $ 1 " ! &;% > 6 L 9 < 2 ' 6 9 , < 2 ' 9 6 < 6 >6 J 39 > 6 > > ) 6 < 9 < > > + + , ( ' 6 , A K 9 > 9 $ < O > 6 < ( & 2 6 % < < M 9 ' '- " < ' > '- > 6 < 9 6 > 6 % > > '9 > > ( & '9 5K < 9 K ,9 9 69 ( ) 6 + + 6 2 9 6 & < 9 9 9 K ( 6 9 < 9 Q 2 % 69 ' 69 9 < + + 2 &6 H 6 P # & G ,9 ) 3 + < 6 A 2 9 6 > < < 6 / 2 6 $ 9 6 6 9 9 9 ( 6 ( , + 6 69 A < & 9 > , 6 " , '< 0 '#0. 2 K > = '- 504 $ "#0 - - I .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " "% ! , 2 & $ 2 < ( & 2 6 < 6 N$ 9 > 6 2 ' 9 < 6 6 9 2 < 69 & 6 , 9 > 6 < 96 "' ( 9 6 & 9 "' %; > " > < 6 9 6 A < > 96 < 6 6 >& '9 < 9 > ) 6 , 6 A 96 , * 96 > 2 -1 > 6 ( < > > ,$ '0# . 6 \< < ' + & & & ' & > $ 9 > 9 2 9 9 6 # 2 6 > 9 2 9 ' + 9 > 9 > 2 > 9A 6 2 6 2 2 A9 6 9> 6 9 A < 9 2 < 2 < >& > 2 6 * 9 2 < 6 2 < 2 2 9 > > ,92 6 K > 9 > 9 *9 O ( > 9 > 5 > A < 6 69 ,9 > > < 1 1 5 9 <6 + 3 > ; / 6 " > , '< 0 '#0. > 9 ( 9 '- $ "#0 - 4 504 - .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " 0 A >& " () '*+ % , '< 0 '#0. ' " ' * @ +0 : # " 0 5 $ 0 (1#/#"("# / +< +" 1 ! ( 3# #) 3 +0 +0 > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < 1 High-Level Design for Ultra-Fast Software Defined Radio Prototyping on Multi-Processor Heterogeneous Platforms Christophe Moy, Mickaël Raulet Abstract—The design of Software Defined Radio (SDR) equipments (terminals, base stations, etc.) is still very challenging. We propose here a design methodology for ultra-fast prototyping on heterogeneous platforms composed of GPPs (General Purpose Processors), DSPs (Digital Signal Processors) and FPGAs (Field Programmable Gate Array). Lying on a component-based approach, the methodology mainly aims at automating as much as possible the design from a functional verification to a multiprocessing heterogeneous implementation. We insist that a complete HW/SW co-design approach is still currently not viable with any technique. The proposed methodology is based on SynDEx CAD design approach originally dedicated to networks of multi-GPPs. We show how this has been changed to make it first coherent with an embedded context of DSP for instance. The implication of FPGAs is then addressed and integrated in the design approach with very low restrictions. Apart a manual HW/SW partitioning indeed, all other operations may be kept automatic in a heterogeneous processing context. The targeted granularity of components to be assembled in the design flow is that of the size of a FFT, a filter or a Viterbi decoder for instance. The re-use of third party or pre-developed IPs is a basis of this design approach. Moreover, the methodology is capable of mixing in the same design the processing of radio as well as application layers, merging and factorizing the most computation-demanding processing elements of an SDR equipment. In addition, the proposed design methodology is not restricted to SDR system design but can be useful for any real-time embedded heterogeneous design in the context of prototyping. Index Terms—Software-defined radio, design methodology, heterogeneous platform, cross-layer design T I. INTRODUCTION HIS paper proposes a method that meets most of the requirements for the design of Software-Defined Radio (SDR) equipments. Research on SDR aims at investigating all the topics that could help improving enabling technologies for future radio systems [1] [2] [3]. We address in this paper the particular topic of SDR equipments design, which is still a Manuscript received March 7, 2008. C. Moy is with SUPELEC/IETR, Rennes, France. (phone: +33 2 99 84 45 84; fax: +33 2 99 84 45 99; e-mail: [email protected]). M. Raulet is with INSA/IETR, Image Group, Rennes, France. (e-mail: [email protected]). very open subject. This is also very similar to any real-time embedded heterogeneous design issue such as image processing for set-top boxes for instance. SDR design is indeed a co-design issue that is not restricted to SDR, but also concerns most of embedded real-time equipments. If we could discriminate SDR from other equipment categories, we could say that SDR brings the flexibility paradigm at its highest extremity. That is why SDR equipments are expected to be composed of a heterogeneity of flexible processing components, such as DSPs, GPPs, FPGAs and ASICs. Moreover, the flexibility is not only considered at design time in SDR systems, but also at running time. An SDR system consequently should be capable of taking benefit from the potential reconfigurability of the flexible components it is composed of. This implies an adequate design methodology that transforms this potential reconfigurability in effective flexibility at run-time [4]. The existence of a reconfiguration management architecture at the level of an equipment, and adequate reconfiguration management infrastructure at the system level [5] must not be forgotten. However reconfiguration management is not directly included in the design methodology of this paper, but we show how the approach is compatible to its insertion, in a future step. This assertion relies our deep experience on the reconfiguration management issue in [6] and [7]. Reves et al. in [8] are also investigating ways to solve these challenges with the same philosophy. There are several ways of considering SDR design. The approaches can be divided into two distinct categories: - fast prototyping for lab demos, - commercial equipments design. Time has not come yet to consider the second category as realistic now. Only sub-parts of the design flow are achieved and they are mainly common with the prototyping flow. We particularly address in this paper the topic of fast prototyping. Usual heterogeneous embedded design approaches mix several independent tools dedicated to hardware or software, which require a long effort and is subject to be source of errors. That is to answer this issue that we tackle the issue from a higher level in order to imply automation and acceleration in the design process. The paper is organized as follows. The requirements of heterogeneous co-design as addressed in SDR area are > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < explained in section II. But there are many ways of addressing the concerned co-design issue. This is explained in section III of this paper and section IV extracts the main features of a realistic flow. Section V proposes a possible instantiation of this flow and illustrates how the proposed heterogeneous approach answers the requirements of section III. Finally, SDR design examples based on the proposed approach are given in VI, before drawing some conclusion. transmission side. Consequently, multi-processing is an intrinsic requirement of SDR to speed up the required computations. RF II. SDR DESIGN ISSUES SDR design is very challenging and no solution covers all the expectations of radio engineers. This section intends to list the main issues to be solved. It can be noted that other embedded systems have the same requirements, which does not restrict our approach to SDR. Image processing for example is completely in the scope of the proposed methodology. The ideal SDR design methodology should offer a set of features facilitating the design of systems having the following main characteristics: - flexibility, - multi-processing, - heterogeneous processing, - object-oriented design facilities, - HW-SW co-design, - embedded constraints, - signal-processing simulation, - hardware abstraction. Moreover, portability and re-usability are a constant concern, as well as the elevation of abstraction in order to simplify the global design process. This list is not exhaustive but it is sufficiently challenging to be considered as a very ambitious target. A. Flexibility The salient feature to be considered first for SDR is flexibility. All the previously listed elements have to be considered in light of this keyword. Radio design has been based for more than a hundred years on analog electronic component, which imposes a pre-defined transmission scheme. SDR is the answer to this limitation thanks to last technology progresses. Flexibility is achieved through a digital approach. The radio applications become digital (the ideal would be software) and can be played in any hardware platform. The radio application (software here) consequently can be changed, just like a piece of software that can be changed on a computer. This is the end of a century of fixed-behavior radio. This is the software radio era. B. Multi-processing SDR is defined as an underdeveloped version of the ideal Software Radio of Fig. 1 [1]. Software Radio encapsulates very demanding digital signal processing capabilities since it brings closer to the antenna the digital-to-analog conversion at the reception side, and digital to analog conversion at the 2 RF Fig. 1 – Ideal software radio equipment architecture Software radio is currently not realistic except for low carrier frequency transmission systems. More often, at least a stage of frequency translation from radio frequency (RF) in intermediate frequency (IF) or baseband (BB) is done in analog between the antenna and the conversion. This pragmatic approach illustrated in Fig. 2 is called SoftwareDefined Radio or SDR. IF RF BB IF RF BB Fig. 2 – A realistic SDR equipment architecture Even if it relaxes the digital processing components, this still remains very challenging. The constraints are so strong that the direct approach of Fig. 2 may fail and imposes sometimes to investigate signal processing alternative solutions in the most demanding contexts, as for UWB for instance in [9] and [10]. Nevertheless, in the most relaxed situation of baseband digitization, digital operations are timed at a multiple of the signal bandwidth, which can reach tens of MHz in a single band terminal for UMTS, and hundreds of MHz for multi-band base-stations. A multiple of GHz will soon be required for multi-standard SDR base-stations or UWB transmissions. Most demanding operations are those that are over-sampled compared to the symbol rate, as filtering at the exit of the transmitter or the entrance of the receiver, as well as the very complex algorithms in reception, such as turbo decoding. Consequently, digital processing architectures are very complex and combine several devices of the same nature or several kinds of processing devices. This last consideration is the topic of next paragraph. But multi-processing, without heterogeneity, is a topic itself. An SDR design methodology > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < must provide facilities to map and program a distributed system. Designing SDR equipment means allocating software elements, composing the radio application, on the hardware devices of the equipment. It has to manage communications as well as processing. It has to schedule communication and processing periods of each processing unit. This includes means of scheduling prediction and optimization in order to perform an automatic mapping and scheduling of the SW application to the HW platform. Moreover, from this follows the demand of a generation of the communication glue associated with the processing operations. C. Heterogeneous computing The heterogeneity of devices indeed permits to take benefit of the different advantages of each category of processing components. Let us keep in mind that flexibility is of major interest for SDR. Consequently, most popular processing devices in SDR design are the following: - GPPs, - DSPs, - FPGAs, - digital ASICs, - analog ASICs. As explained earlier, analog ASICs are still necessary. SDR can not get away from antennas, amplifiers and analog filters which is a condition of the transformation from the electric to the electro-magnetic world. Digital ASICs and FPGAs are able to process heavily parallel computing. ASICs are a hardware solution particularly suited for low power, very high speed computing. But they are dedicated to high quantity markets in order to compensate the chips design cost. FPGAs provide some of the hardware (HW) capabilities at lower buying and developing cost. But FPGAs offer HW flexibility, that is most important for SDR. Two levels of flexibility may be considered for FPGAs. First a flexibility at design time which permits to make several designs in a FPGA and run them at different periods of time. This has been the usual manner to use FPGAs for years and it is very profitable. But a new opportunity is given by FPGAs now, which consists in dynamically reconfiguring a sub-part of the gates of the FPGA while the rest of the gates can pursue its processing. Delahaye et al. have defined and validated a design methodology as well as flexible processing elements design approaches particularly interesting for SDR using partial reconfiguration of FPGAs [11]. Other domains are also making investigations, as for crypto [12], but this is out of the scope of this paper. DSPs offer the best trade-off between processing power and power consumption [14]. They are particularly accurate in the SDR context, all the more so as manufacturers integrate more and more co-processing capabilities dedicated to radio signal processing such as Viterbi decoders in the TI C6416 for instance. Their capability to be programmed in C language makes them really pertinent for SDR, enabling portability from one DSP device to another. GPPs have become a potential alternative for a few years with a tremendous increase in their computation power. GPPs 3 have the advantage of supporting high-level programming languages, as well as highly advanced operating systems. This last feature may be helpful for portability but also for reconfiguration management. D. HW-SW co-design The choice of the repartition of the processing elements between the different categories of processing devices has to be done depending on the capabilities of each category of components in terms of processing power, but also power consumption, heat dissipation, cost, etc. This topic is very complex and no automatic solution exists today. It remains mostly based on engineers’ experience. The main challenge is to separate software (SW) from hardware (HW). This should be understood in the sense that we call SW something that runs a processor, and HW what is executed on a FPGA. We choose this distinction in the rest of the article. By HW-SW co-design, we have to restrict here to the capability of the design flow to integrate at a high-level, processing elements of different nature in code, such as C or VHDL, to be run in different categories of processing components, such as GPPs, DSPs, FPGAs or ASICs. We also take into consideration the specific case of parameterizable ASIC since, if the processing is fixed, the configuration of their parameters may be done on a DSP within a C function. In this sense, an ASIC is considered as a processing unit running a dedicated functionality. E. Object-oriented design facilities SDR is an evolution of the radio design. Radio design is coming from the electronics field which is very much linked with the notion of hardware component. This is mainly due to technology reasons. Electronics components have been for a century the only bricks available to build a radio. On the other side, computer engineering integrated the electronics paradigm of “components” in the SW domain. This makes a componentbased design approach [15] definitely suitable for SDR design demand [6]. Advantages are numerous, but let us highlight the following at least: modularity, portability, replacement by reconfiguration, re-usability, etc. The reason is the componentbased approach brings a natural separation between signal processing and execution architecture. The object-oriented approach indeed gives the opportunity to increase the level of abstraction of the design. This has two main consequences. First, SDR design may take benefit of the latest progresses in high-level design. The use of UML for embedded systems design in general [16] and SDR design is an example [17]. Secondly, we argue that the temptation of inventing a completely brand new design flow for SDR must be avoided. SDR design must be based on other design technologies in order to take benefit from their advances. In that sense, we defend the following approach for SDR design. An SDR design flow has to give the opportunity to integrate processing elements (let us also call them IPs here, for Intellectual Property) made by other specialized designers or tools. Then SDR design may be seen as a kind of IPs > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < integration process. This relaxes the SDR design flow from the very intimate processing elements or IPs design and gives a higher level position to the designer thanks to a componentbased approach. This permits to consider signal processing elements as black boxes with very general characteristics, such as for instance execution time, mean power consumption, memory use, etc., and not as a succession of atomic operations, or a set of gates. Another advantage, if not the most important is that it permits to keep the optimality of each IP design thanks to dedicated tools for each domain, as a synthesizer for a FPGA, or a compiler for a DSP. It looks unfair to pretend making a better tool than the specialists of each of these domains. Moreover, this opens the possibility to benefit from third party developers for IPs. This announces the proliferation of IPs integrators on generic platforms in the future. The re-use of IP, moreover, is a guarantee of reliability. It also gives the opportunity to benefit from the use of IP, the content of which is protected. F. Embedded constraints Embedded constraints can be the following: - hard real-time, - memory limitation constraints, - power consumption, - cost, - size. In the SDR context, first step to overpass is often to solve hard real-time data flow constraints. It often implies to consider memory optimization as well. These two points are the state-of-the art of rapid prototyping approach, and are of course also to be considered in commercial equipment design. Real-time guarantee is mandatory. It may consequently be not preferable to rely on real-time operating systems (RTOS) in that context, compared to a static scheduling of operations. Power consumption, as well as cost and size are not considered in a prototyping approach but are for commercial systems, and consequently are not in the scope of the approach proposed in the paper. A long term goal is to integrate these considerations from the very beginning of the design phase, which is currently done manually. Research studies on power consumption are numerous [2], regarding the importance of the topic. Some are based on a prediction [18] or a measure of the power [19]. Attempts to help taking into consideration several of these parameters at the very early steps of a design flow have been proposed [20] but without providing an integrated resolution method yet. G. Signal-processing integrated simulation Another required feature of an SDR design flow is the capability to simulate the system in order to check its functional and non functional behavior. A major point to be stressed is that there is almost no interest for such a simulation if the conformity between the simulated version and the final version installed in the real system is not guaranteed. If not, the checking will have to be done again in the platform. An alternative is to give the possibility to implement very 4 fast the system on the hardware platform and directly run the real system to check it in-situ. H. HW abstraction Abstraction layers may have several roles in an SDR system. At design time, it may be seen as a technique that enables to abstract the signal processing IPs from the hardware target, as a Java code that is planned to run on a Java virtual machine. This is not very popular for SDR as it may decrease processing performance too much. Another approach is to take benefit from a higher level design approach to hide some details of the hardware implementation. This may be achieved through the use of only few features for the characterization of IPs execution on this hardware. This permits to speed-up design phase and direct implementation on the platform, where the very intimate measurement may be done with exact results. Another abstraction is acting at run-time. This may be done thanks to a layer of middleware. The ultimate proposal in that sense is the Software Communication Architecture (SCA) selected by the US Department of Defense (DoD) in the JTRS (Joint Tactical Radio System) program [21]. A CORBA (Common Object Request Broker Architecture) software bus is proposed in the SCA to support the abstraction of the hardware for the software. This is a major source of concern for the radio design community. Solutions trying to go round the full-CORBA approach while supporting some SCA compatibility are numerous [22]. They mostly consist, for realtime signal processing, in bypassing CORBA which should definitely be restricted to reconfiguration management. As explained earlier, reconfiguration management is not in the scope of the current paper but has to be taken into account, as attempted in [8]. I. A design approach larger than SDR domain The list of properties for SDR design approach has many commonalities with other application fields. This means that solutions targeting SDR design may also be applicable to other domains. In the context of image processing in particular, all listed characteristics are valid, even flexibility. This is particularly relevant regarding the very last versions of video coding schemes from MPEG standardization groups. MPEG-4 for instance targets a large scale of coding/decoding schemes which implies flexibility by nature. This requirement is also stressed at its maximum in last RVC (Reconfigurable Video Coding) where reconfigurability is a goal itself [23]. Another example is the need to solve massively heterogeneous multiprocessing issues of the networking infrastructure implementing transcoding of video and audio formats in order to broadcast multimedia contents on different media types (from high definition HD TV to handheld DVB-H). > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < III. CURRENT AND INNOVATIVE SDR DESIGN APPROACHES A. Today: no existing solution Regarding the design flow requirements listed in the previous section, there currently exists no integrated solution for the design of SDR systems and it may be expected there will never be. Consequently, a set of solutions have to be addressed separately and combined. We would rather expect the use of a new tool combined with already existing tools dedicated to sub-parts of the design. We propose here to give a summary of the potential solutions that can be expected, on the basis of the state-of-the-art in terms of already existing languages and commercial proposals. B. Co-design languages If we consider the SDR design issue as a co-design issue, let us consider first the languages that have been created these last 10 years for that purpose. We may refer to SystemC [25], HandelC from Celoxica, and CatapultC from Mentor Graphics to speak about the most used ones. Initial goal was to provide designers with a completely integrated flow for the joint design of the SW, namely the coding of the processors, and the HW, namely the coding of the FPGAs. By integrated it means using a unique language that would be compiled for the SW part of the system and synthesized for the HW part after a joint edition, simulation, debug and validation process. However, SW and HW worlds are intrinsically different if not opposite, which makes it very challenging. Another idea was to facilitate the HW design while getting rid off the VHDL coding and turning it to C-like. The previously cited languages proposed a C-like programming method for HW while creating C++ libraries expressing the needs of HW design, such as binary and vector data types, or the possibility to express parallelism. But all C++ code can not be converted to HW, which provokes many synthesis impossibilities. The restrictions are so important that we may exaggerate till the writing of VHDL in C++. Moreover, this approach is far from providing the most efficient FPGA design in terms of processing speed, surface occupation and power consumption. This finally makes it accurate for co-simulation, but as the HW part has to be manually redone, this completely breaks design flow as the validation at high level is not guaranteed. We must insist here in the following point: a 95% synthesis of the SystemC code for a HW target is not enough if it takes a huge amount of energy to solve the 5% remaining issues, which is often the case. Finally, these co-design languages are often restricted to transfer level simulations at their best capability. C. MathWorks A very popular method for fast prototyping is proposed by Mathworks, based on Simulink. The first solution was proposed in the early 2000 in association with LYRtech, an SDR platform provider [26] and has been generalized to many other providers since. It consists in providing a direct bridge between a Simulink environment for signal processing 5 simulation and the execution on a heterogeneous hardware platform composed of DSPs and FPGAs. The link from Simulink to DSPs is obtained through RealTime Workshop from MathWorks. Concerning the HW side, Xilinx is providing a set of FPGA IPs which are guaranteed cycle accurate equivalent to IPs provided in Simulink. It provides an artificial translation made from Simulink to the FPGA. This faces the following restrictions. It is firstly dependent from the existence of the adequate APIs (Application Programming Interface) for each platform. However if this solution is generalized, all platform providers will make the effort. Secondly and most important, HW domain is restricted to Xilinx, and even a sub-set of Xilinx since only boxes having the dual Simulink/VHDL IPs are available. Note that it does not prevent any designer to make its own IP with this technique. The guarantee of the equivalence is under the responsibility of each company then. Despite the above mentioned concern, MathWorks is offering the most effective solution to SDR designer in the short-term. Just a lack of a higher modeling introductory work at specification level in the design flow may be regretted. However, it really permits to implement a heterogeneous design from functional simulation in Simulink environment to the implementation. It is in this sense a signal processing based approach. D. High-level design At the other extreme, and with longer term objectives, we may refer to computer-science oriented approaches based on high-level modeling, such as UML (Unified Modeling Language) of the OMG (Object Management group). UML is another way to interpret co-design. As UML indeed is dedicated to help in the modeling of any kind of system, even outside engineering domain, then why not for mixed SW and HW? In addition, systems in general are becoming more and more complex. Hardware and software designers (in its usual sense here and not in the sense of differentiating processors from FPGA designers) have to cooperate in larger and larger projects. This is provoking a need for a global system design methodology. The engineering answer has been MDE for Model Driven Engineering, and the methodology, MDA for Model Driven Architecture [29]. This approach is mainly based on UML concepts. Another feature is the two steps PIM and PSM. PIM stands for Platform Independent Model and corresponds to a modeling of the application independently from any implementation consideration. Then PSM for Platform Specific Model adds new characteristics to the model in order to take into account the HW platform of execution. MDA, which was introduced in 2000 by the OMG, is now a quite successful concept. An attempt to integrate SDR design in a UML methodology has been done in A3S project [30][29] for the modeling and prediction of non functional characteristics of such systems. A metamodel for SDR has been derived for the generation of a UML profile named “A3S profile” [20]. A profile is a way to extend UML concepts for a specific domain. Practically, it consists in customizing a UML > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < environment to a specific domain of application (and getting rid off all the unnecessary concepts). The OMG has already standardized several metamodels for embedded real-time systems. The last is MARTE [16]. New researches on SDR are inspired from MARTE, such as Mopcom [27] that aims at defining in a 2 years-time a design flow to automatically generate VHDL code from high-level UML-based modeling [28]. E. Other approaches Other design approaches for SDR have mainly oriented their solution towards a specific hardware implementation target, while offering the inherent flexibility of SDR design. One solution is to propose a processor customized for SDR. QuickSiver Inc. has designed a hardware chip and associated software development tools. The Adapt2000 ACM System Platform claims to integrate in a single IC, the Adapt2400 ACM (Adaptive Computing Machine), the capabilities of ASICs, DSPs, FPGAs and micro-processors [31]. The Adapt2400 is comprised of four distinct heterogeneous node types and a node wrapper. Complementing the Adapt2400 Architecture are the InSpire Node Software Tool Set that abstract away the complexity of the heterogeneous, multinodal, multi-tasking ACM architecture under a single unified programming model. Another choice may be to privilege brut performance (computation speed, power consumption). A promising approach in this field is the use of systems on chip (SoC) that gathers several (more or less dedicated) processing units inside a single chip. The generalization to a high number of units, as required in SDR systems, leads to networks on chip (NoC). A NoC can be differentiated from a SoC in the sense that the number of units becomes so high that communications between the units themselves become an issue and need their own processing units. A very advanced work in the domain of SDR is the FAUST (Flexible Architecture of Unified System for Telecommunication) chip from CEA [32]. It has been originally designed to support potential 4G candidates based on MC-CDMA modulation schemes [33]. The flexibility was a necessity for supporting several ranges of parameters in order to evaluate several possibilities and to incorporate flexible schemes for 4G. This included real-time reconfiguration capabilities in a certain set of configurations limited by the parameters possibilities of each unit. Then it turned in a second step towards a real platform for the support of multistandards SDR systems. CEA designed a new version of FAUST chip named MAGALI (Multi-Applications Globally Asynchronous Low-power Integrated circuit), which incorporates new and more diverse processing units. This permits to MAGALI to support a large scale of current and future standards, such as WiFi, WiMax, etc. with MIMO support. Note that even if the set of possibilities is very high, it is limited by construction. It is in particular specifically dedicated for radio processing and could not address other domains. It also includes advanced capabilities in terms of power consumption savings, which is a definitely advanced 6 feature in the field of SDR. Even if this seems contradictory with choosing a HW target, Vanu Inc. made the choice to stay generic a hundred percent in the sense that the hardware target they selected is “the” generic processor. It is based on MIT researches in the middle of 90’s [34] for GSM base stations [35]. The main idea is to take benefit from the natural technological improvement of GPPs with the Moore’s Law and keep the advantage of existing highlevel programming tools supported in a GPP environment. The portability is then implicit by essence as it is the exact application of the PC concept for the radio. Vanu Inc. has been the first company to be certified for the manufacturing of GSM SDR base stations in the US in 2004. But we may wonder why this technique is not so popular and why Vanu Inc. does not have a stronger market position. This reveals that this is probably not yet mature for the infrastructure. It will consequently be even worse for terminals as the main drawback of GPPs is the power consumption. Let us cite also the tools that are very accurate for the optimized design of FPGA IPs and that may be complementary to the previous, as for example: LISAtek [36], and GAUT [37]. LISAtek aims at providing integrated tools for the design of ASIPs (Application Specific Instruction Set Processor) in order to obtain an optimized hardware processor architecture for a dedicated set of processing operations and the associated programming tools (debugger, compiler). This could be used for the design of an SDR SoC for instance as experimented in [38]. GAUT enables to reuse an IP algorithmic specification written in C/C++ and to synthesize it into an equivalent VHDL RTL specification. The potentially pipelined architecture which is generated allows to explore the design space for FPGAs or ASICs through a trade-off between processing speed, power consumption and area. This can be a complementary tool for the integration of processing units in previous solutions. At the system point of view, another tool worth to be considered for the design of embedded equipments is CoFluent Studio from CoFluent Design [39]. The tool permits to manually explore a heterogeneous design space with an extreme intimacy in order to anticipate and orient design possibilities. This is a product-oriented approach that requires quite much time and expertise. Moreover, CoFluent does not provide automatic code generation facilities. The exploration stays at the level of a virtual platform. We mostly privilege in this paper rapid prototyping oriented solutions that help a signal processing engineer to fast implement an SDR system with automated steps. Future will tell if one of those approaches more or less dedicated to SDR are the good ones or not. Maybe the market of SDR will be shared between several of them. The design methodology proposed in the next section is less dedicated to SDR than most of those described here and could also be worth for other embedded real-time prototyping design perspectives. > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < IV. A REALISTIC DESIGN FLOW FOR SDR OPEN ARCHITECTURES The term realistic is here to explain that with the current state-of-the-art, a perfect solution for fully integrated co-design is not available and not even foreseen. We mean here by open architectures that we consider the solutions based on COTS programmable and reconfigurable components. We exclude from the study the approaches based on pre-oriented hardawre, which reduces its range to only a subset of SDR capabilities in terms of flexibility. What is proposed here can be used for any heterogeneous real-time data-flow oriented embedded design. However, the concern of this paper is SDR, so we must keep in mind that SDR is not only data-flow oriented. The approach has to be compatible with the addition of a reconfiguration management architecture later, more control-oriented by essence. Moreover, SDR design must not be restricted to radio design. It should be seen at a larger scale, including other layers of the OSI model. SDR design is also a question of cross-layer design. This typically concerns in a terminal, a joint approach for radio and image processing for instance [40]. A. Ideal design flow for SDR Fig. 3 schematically represents what could be the ideal SDR design flow. It would first consist in a high level modelling phase. This would permit to take advantage of mature modelling techniques which permit to the different actors of the design, respectively HW, SW and signal processing designers, to commonly refine the very early design choices in a common environment from the system specifications. First step consists in making a simulation and functional validation of each IP. The hardware target that is planned to run the IPs would be here completely transparent. Ideally, the same language would be used to program a SW (dedicated to run in a processor) or a HW (dedicated to run on a FPGA) IP. The system functional validation is directly obtained by combining the validated IPs of previous step. In parallel to this step, non functional requirements, as well as platform features are derived. Platform has to be considered here as the composition of devices and their associated low level software such as board support package, RTOS, etc.. Then all the information is merged to permit an automatic HW/SW partitioning for heterogeneous multi-processing. Predictions figures are achievable, such as the application execution time, so that other HW/SW matching could be tried if the constraints are not respected. This permits to dimension the HW platform at early stage of the design flow. Let us just note that this is far from current reality. The partitioning guarantees the functional accuracy of the multi-processing version of the application compared to the previously simulated mono-processing version. Of course, in that ideal world, the automatic scheduling is also done. The communication code generation is derived directly from the scheduling. IPs automatic code generation is obtained, whatever their nature, HW or SW. The transformation guarantees the equivalence with the previous model so that it 7 permits to avoid repeating the simulation and verification procedure already done earlier. A co-simulation tool permits to definitively validate the heterogeneous system. Finally, the implementation on the platform is straightforward as the ideal design flow takes every aspects of the implementation, whatever their level. ! Fig. 3 – Ideal SDR design flow B. A realistic design flow for SDR A realistic design flow for SDR design is proposed in Fig. 4. It pretends to use at its maximum capabilities a set of already existing commercial and/or academic technologies. Two main goals are achieved here. First goal is to avoid the reprogramming of the same function several times. This is what happens usually for heterogeneous design. No less than 6 re-coding steps may be done for certain sub-parts of a heterogeneous system, as for a HW IP: - Matlab functional validation, - floating point C functional validation, - fixed-point C validation, - SystemC, - cycle-accurate SystemC, - VHDL. In addition to the extra time necessary to code again the same functionality, this approach is very error-prone and each > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < step requires repeating the debug and validation process. Second goal consists in automating all the implementation dependent actions, and keep the multi-processing functional accuracy from separately developed and validated IPs. IPs typical abstraction level is C language or VHDL. This could even be an executable code or a bitstream coming from a third party keeping the secrecy of its code. This means that the IPs code for the selected HW target is available or can be obtained with dedicated tools (compiler for DSP and synthesizer, place and route for FPGA), so that non functional characteristics may be extracted, such as for instance: the programming code and data code size in a DSP, the number of gates for a FPGA, the execution speed, etc.. As previously stated, the IPs design is voluntary set out of this flow. Note that we also definitely consider here that there is no efficient solution for automatic HW/SW partitioning. This must be kept at the designer choice, based on his experience. Then HW (respectively FPGAs) and SW (respectively processors) worlds have to be treated separately, each of them having its own way of managing multi-processing for automatic partitioning and scheduling. But strong equivalent principles must be applied for both, which permits to keep the global cohesion of the design for an SDR approach. Main of it is the asynchronism between operations. It is natural on processors since processing speed is correlated to device clock and not data rhythm. It can be used in FPGAs using a GALS (Globally Asynchronous, Locally Synchronous) design methodology for HW IPs [41]. This gives many advantages which are requirements of SDR for: - re-usability: an IP may be used in different designs at different clock rates, - portability: from one HW device to another, - managing reconfiguration in a future step, - power consumption considerations which is the origin of GALS. Nevertheless automatic code generation for communications between the two worlds has to be possible at least. The methodology should also bring some automatic partitioning and scheduling concerning the SW side only. It should be able to provide a multi-processing version which is functionally certified equivalent to the mono-processing version of the application used for validation. 8 " Fig. 4 – Realistic SDR design flow today How can this be done? This is explained in section V. C. How supporting reconfiguration in a future step? Reconfigurability is intrinsically provided within this methodology by two complementary means: - the choice of generic hardware components for the platform, such as GPPs, DSPs, FPGAs, - by construction of the software application through a component-based approach for both SW (processors) and HW (FPGAs) processing elements. This is the necessary base that will permit in a further step to integrate a reconfiguration management architecture. We have derived a reconfiguration management architecture that is particularly suited to this design methodology [6][7]. It is out of the scope of this paper to describe it, but we precise that it has been successfully developed and experimented through several prototyping showcases. One main feature of an adequate reconfiguration management architecture is to perform reconfiguration operations in a very short time in order to limit as much as possible the overhead compared to the signal processing duration. It is important to point out that we deliberately disjoined the reconfiguration management design on the one hand and the SDR signal processing on the other hand, while keeping all the necessary interconnections between them. It does not seem mature enough to us to merge both approaches at the same time currently. That is the reason why the design methodology of this paper does not include but supports the coherence with a reconfiguration management > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < architecture. However we are attempting to pursue this final goal, such as for the FPGA sub-part in the Mopcom collaborative project [27][28]. Adding to this, it has been highlighted that a particular effort has been done to completely master each part of the design. We insist that this is a key point of making efficient reconfiguration is to integrate reconfiguration in the processing elements design itself. From the formal point of view, we have been investigating parameterization techniques for several years. It consists in designing processing elements in such a way that they may be reconfigurable fast enough through the use of parameters [42]. An extension is to plan the multistandard equipments design with a major use of this approach till the use of common operators, such as explained in [43][44][45]. From the experimentation point of view, we have also shown that the reconfiguration management may be merged, at least partially, with the signal processing in order to offer more flexibility. That is why we propose an hierarchical and distributed management architecture in [46]. It is particularly obvious in the particular case of partial reconfiguration of FPGAs we have investigated in [7][11]. The transition from one configuration to another can be extracted from the difference between two designs generated rapidly by the methodology proposed in this paper. Some overhead has to be planned for the transition from one design to another in terms of processing time. D. What already existing solutions do and not do The combination of Simulink with platforms such as LYRTech seems to us the closest solution. But there is an eliminating limit: design his own IP is a necessity to control the system in such a sensible implementation domain. With Simulink approach, the designer depends on Xilinx IPs to fully use the design flow pertinence. It is also possible to add you own IP, but it necessitates coding it twice: for Simulink and in VHDL. Then you break one of the goals which is to avoid recoding. High level design approaches based on UML offer promising perspectives to formalize the top of the design flow. There is a need for a bridge between the specification and the functional validation. A3S approach and A3S metamodel [17] partly answered this need, but without generating the result of the architectural study in code for the validation and the implementation. Mopcom project is trying to fill this gap for the HW side while using automatic transformations between three layers of metamodels [28]. Each of these layers is taking into account, with an increasing level of accuracy, the details of the implementation. The goal is to model a system at several levels of abstractions while keeping advantage at a lower level of the validation and results already obtained at a higher level. We do not pretend having explored all the existing solutions, but we mentioned the most interesting ones. In a nutshell we can draw the following analysis. There are two ways of considering the extension of the design flow towards a fully integrated and automatic design flow for SDR. Either 9 starting from the electronics point of view and follow a bottom-up way. This is what is chosen by designers that want to keep real-time as a priority for instance. Or it is also possible to look at it in a top-down perspective and privilege a software engineering approach to take full benefit from last computer science advances and tools. V. A DESIGN METHODOLOGY FOR ULTRA-FAST SDR PROTOTYPING BASED ON SYNDEX We propose here in detail a design methodology for SDR prototyping respecting Fig. 4 and fulfilling the requirements of part II. The set of tools we are using for that purpose is probably not the only possibility. That is why we firstly presented this methodology in general in section IV.B and a possible instantiation in the present section. It lets free any other implementation of this methodology to the designers, depending on their particular application domain or habits. We remind also that the granularity level of the constitutive elements considered in the methodology is at the size of a signal processing IP, such as a filter, a Viterbi decoder or a FFT for instance. A. SynDEx approach The tool selected for the design flow has been SynDEx [47] from the INRIA till now. SynDEx stands for Synchronized Distributed Executives. The role of this tool in the proposed methodology is to proceed the coarse grain design steps till the code generation, mainly the glue communication code between IPs. Then specialized tools dedicated to each processing domain (respectively HW/FPGAs and SW/processors) take over till the implementation. Major requirement of the design flow is that no re-writing of the code is necessary between these two steps in order to keep former step verifications valid. a) Major features Entries for the design flow are two graphs: - a hardware platform diagram, - a software application diagram, In order to speed-up design phase, it is deliberately chosen to have only a restricted set of parameters taken into account in the architectural exploration: - execution time of each IP, - communication atomic time for each data type, - communication means features The goal is to obtain an implementation on the HW platform as fast as possible in order to deal with realistic constraints instead of having approximate simulations of these constraints in the design flow itself. Partitioning optimization of SynDEx is performed using graph theory. The SW application graph is modeled by an extended Data Flow Graph (DFG) which is an oriented hypergraph. Each vertex corresponds to an algorithm operation or a processing element that is activated by the fullness of its input buffer and each edge represents a data transfer between operations. An example of SynDEx software application graph > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < IP_1 IP_2 o i IP_3 o i IP_6 o i_1 i_2 Operation i_3 IP_4 i Data dependency between 2 operations o IP_5 i o Fig. 5 – Example of SynDEx software application graph Moreover, the model includes hierarchy, delay, conditioning (if then else) and factorization (for loop) to express the potential parallelism. Factorization, which is associated with a repetition factor, is used to repeat operations and requires additional specific vertices in the DFG: 1) Fork/Join vertices: the “Fork” vertex explodes each element of the data it receives for each parallel repetition of the consumer operation whereas the “Joint” vertex builds the data it sends by concatenating each separated element produced by each repetition of the producer operation. 2) Iterate vertex: this vertex aims to sequentially duplicate a producer/consumer operation where outputs of the current operation can be the input of the next one if data names are similar. More details can be found in [49]. The hardware architecture is modeled by a non-oriented hyper-graph where each vertex is a processor (hardware component) and each hyper-edge represents a communication medium, as shown in Fig. 6. In this model, a processor consists of one operator and as many communicators as there are connected media. An operator executes operations which are a part of the algorithm and a communicator executes a communication operation when a data transfer is required. In this way, multi-component architecture can be represented by a network of Finite State Machines (FSM) interconnected with communication media (FIFO, shared memories etc.). PC1 (lib_archi/pentiumOS) (main) TCP1 PCIsam_0 PCIram_0 TCP1 (lib_archi/TCP) PC2 (lib_archi/pentiumOS) TCP1 PCIsam_0 PCIram_0 Fig. 6 – Example of SynDEx hardware platform graph The aim of this tool is to find the best matching between an algorithm specifying the application to be run and a multicomponent architecture. In addition to this, real-time and embedding constraints must be satisfied. This methodology is based on a graph theory to model the software application and the hardware architecture. Both the software and the hardware are described by distinct graphs. SynDEx transforms those two graphs with graph transformations in order to generate an optimized code implementation. An efficient implementation graph is obtained by optimizations by simultaneously realizing distribution and scheduling, while trying to minimize the execution time. There are a large number of possible implementations. The optimization problem aims to select the most efficient one between them (best latency). The latency is the total execution time of the DFG on a given HW architecture between the first scheduled operation and the last one. Moreover, the problem of distribution and scheduling in case of HW multi-component is known to be NP-hard (an exhaustive research on all the possible fulfillments is inconceivable). This is why heuristics are used to match the best approximation of the optimal solution (greedy and genetic algorithm). The current heuristic attempts to minimize the total execution time of the algorithm running on the multi-component architecture. Moreover, since the Synchronized Distributed Executives (SynDEx) are automatically generated and safe. It consequently eliminates part of the tests and low-level hand-coding, and decrease the development lifecycle as well. SynDEx provides a timing graph, as in Fig. 7, which includes simulation results of the distributed application and thus enables SynDEx to be used as a virtual prototyping tool. Time is given in Fig. 5. 10 Fig. 7 – Example of SynDEx partitioning and scheduling SynDEx is also providing a static scheduling, which is of major importance for hard real-time requirements. It gives the guarantee of an execution within a restricted given latency period. SDR equipment will have very strong real-time constraints to respect. Bringing SW in the radio design must not be followed by customer’s suspicion or even worse rejection. It has already been seen in the mobile phone area with collapse of WAP (Wireless Application Protocol) that a technological advance badly introduced on the market may be very harmful. b) SynDEx tool SynDEx is a cad tool aiming originally at parallelizing the processing of an application on a multi-processor network[51]. It provides a multi-processing version of an application that is functionally accurate as compared to a mono-processor initial version. SynDEx performs an optimized partitioning and scheduling of an application on a multi-processors architecture made of GPPs. Typically communication media is TCP/IP in this context. This can be easily implemented in a platform, instead of a network perspective, by varying communication > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < means between the processors, such as FIFO, dual-port memory, PCI bus, etc.. Moreover, SynDEx has been extended towards the embedded reality and in particular in terms of memory optimization [49] [50]. In this sense, SynDEx particularly answers the SDR issues in the restricted SW domain, in other words if the SDR equipment is only made of processors. SynDEx indeed matches to ideal software radio design. Nevertheless other SynDEx intrinsic features are worth, beyond the purely SW application domain. After the multi-processor matching, SynDEx generates a scheduled version of the application for each of the processors involved in the platform. This code is generic and not dedicated to any implementation. It is written M4 macro-code. Each processing node of the graph consequently has at its disposal a generic expression of the scheduling of operations and communication synchronisms that can be translated in any programming language (assembly, C, VHDL, etc.). The translation is operated by GNU M4 macro-translator, thanks to the use of translation libraries, considered as dictionaries. We have been developing translation libraries for many different embedded targets, as shown in TABLE I, added to the already existing ones of TABLE II. TABLE I LIBRARIES DEVELOPPED FOR THE SUPPORT OF MULTI-TARGETS, MULTIPLATFORMS DEVELOPPED BY IETR/INSA Media Type Communication Platforms Processors media Sundance TMS320C64x SDB FIFO/SAM SMT310q (Sundance) Sundance SHB TMS320C62x FIFO/SAM SMT320 (Sundance) Sundance DM642 Comport FIFO/SAM SMT361 (Sundance) x86 (windows) FIFO/RAM Sundance PCI_RAM SMT395 (Sundance) Sundance PCI_SAM FPGA FIFO/SAM SMT365 (Sundance) Sundance SMT TCP (windows, FIFO/SAM 319 C62x, C64x) (Framegrabber) Pentek p4292 Converter ADC FIFO/SAM (Pentek) Vitec Converter DAC FIFO/SAM VP3-PMC (Pentek) Sundance VP3_RAM FIFO/RAM SMT348 (Vitec) FIFO/SAM Bi-FIFO (Pentek) TABLE II ALREADY EXISTING LIBRARIES Processors Intel x86 (linux) ADSP21060 TMS320C40 MC68332 MPC555 Communication media TCP (linux) RS232 CAN Media type FIFO/SAM FIFO/SAM FIFO/SAM Libraries for platforms, processors and communication media are listed in the two tables. Certain libraries depend only upon the language (C, VHDL), others depend on the 11 processor nature, and last on the other devices, typically for communications (FIFO, memory, bus, etc.). Since SynDEx is dedicated to the processor world (called SW in this paper), it extracts parallel operations from the application graph to map them on different processors (hardware components) but makes sequential operations of IPs on each of the processors of the HW graph. This consequently does not match with the HW (FGPA) reality, which benefits from parallel execution. However all the code generation and scheduling of the SW world on the one hand, and between the HW and SW world on the other hand can be kept a hundred percent. The content of the HW domain has only to be considered separately, with common principles shared with the SW side. B. Heterogeneous processing support This methodology is originally dedicated to multi-processor (of any kind: DSP, GPP, µC, etc.) architectures programming. It also efficiently supports devices which are controlled by processors, such as analog or digital ASICs. And in order to adapt it to SDR design, it is mandatory to extend it to reconfigurable hardware computing, namely FPGA. HW implication has indeed to be considered outside the partitioning optimization of SynDEx if we want to keep the HW efficiency (of parallelism) since processor world is sequential. That is why we mentioned earlier that HW/SW partitioning was not supported by this methodology. The risk to deal with HW and SW approaches in a single automatic methodology is to decrease one side performance at the benefit of the other side. Optimization purposes may be indeed the opposite between HW and SW. It can be considered as a granularity issue between the description of an efficient algorithm written in SW (coarse grain), respectively C language, or HW (fine grain) respectively VHDL. Then we propose a methodology that integrates the HW design to the SW and system design methodology presented till now, while the HW/SW partitioning has been manually done. The characteristics that should be kept in the HW side to be fully compatible with the SW side of the design are the following, mainly based on a component-based approach: - HW portability from one design to another, whatever the device, the clock, etc. - HW to SW IPs migration, - the support of IPs made of gates as well as IPs running on embedded processor cores inside FPGAs, - the support of ASIC. a) FPGA support A reformulation of the design approach has to be made here. The way to solve the issue of interacting between an intrinsically sequential with a highly parallel approach is in the answer of the following question: How to reformulate the dataflow approach of the sequential world of processors to the parallel world of reconfigurable hardware, without loosing parallel HW highly efficient processing speed? Let us just recall that one major goal in SDR is to add, delete, replace a > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < processing element with no disturbance for the other processing elements of the radio design which are inside or outside the FPGA. The first part of the solution is solved by the IP approach, which guarantees the parallelism inside the IPs since it has been designed with usual HW tools. In other words, SynDEx permits to keep parallelism of execution inside an HW IP with this approach, whereas SynDEx is made to execute sequential operations inside a single processor, and only extract parallelism between several IPs on a multi-processing architecture. The second part of the solution is using GALS, as already addressed in section IV.B. That permits to provide also parallelism through pipeline. GALS permits to generalize to the HW world the asynchronism of IPs processing compared to the data flow rhythm. Input buffers, associated with adequate hand shaking signals of Fig. 7 permit to launch and stop the IPs operation depending on the presence (or not) of data at the input of the IP, or at the place available in the buffers at its output. '*#, '*+ '*+ '*+ '() '() '() & #$% 0 ! / # / ( &! / ./ / * 2 .1 1 - ! 2 .2 2 2 4 ! 3 12 1 .3 / Fig. 8 – Data flow speed adaptation in a GALS if f1> f2> f3 b) Specific design use cases with FPGA In the specific case of a unique IP running on a FPGA, the methodology is one hundred percent useful and automatic, as there is no sequential issue for one IP. The FPGA is then a HW accelerator. In this particular context, an automatic HW/SW partitioning is even possible as illustrated in Fig. 10. Let us consider the example of the software application in Fig. 9. If it has to be decided whether IP4 should be inside a FPGA or a DSP, the condition is to have both versions of code with their corresponding execution time for both targets. Then the methodology is able to deal with such a context and may optimize and generate the full code for both DSP and FPGA with the scheduling obtained on Fig. 10. IP_1 IP_2 IP_3 IP_6 Fig. 7 –GALS schematic encapsulation GALS has three main interesting features in our methodology, as illustrated in Fig. 8: - adapt the average frequency of operation of an IP to the other IPs frequency of the chain (Fig. 8b for IP A), - this permits to change an IP of clock domain (Fig. 8c), - this permits to block an IP from time to time, as for a reconfiguration operation for instance(Fig. 8d). o i o i o i_1 i_2 i_3 IP_4 i o IP_5 i o Fig. 9 – Software application graph example The methodology takes the decision to instantiate IP_4 on the FPGA instead of the DSP since executing IP_4 in parallel of IP_3 an IP_5 permits to save time: IP_4 execution time and communication overheads are completely masked during IP_5 and IP_3 execution. The FPGA plays here the role of a coprocessor. > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < 13 adjustment of these parameters. Note that in certain context, a DSP may change registers inside a FPGA, from which they configure the ASIC. # "! & #! Fig. 10 – Fully automatic scheduling obtained with one IP on the FPGA Time A sub-optimal but entirely automatic way of using the proposed methodology is also possible with several IPs on a FPGA. As shown in Fig. 11, it consists in separating a FPGA in as many HW components as there are IPs. If IP3 and IP4 of Fig. 9 are planned to be implemented on a single FPGA, we may define two Virtual FPGAs, assigning IP3 on virtual FPGA1 and IP4 on virtual FPGA2. Fig. 11 shows how the methodology is able to parallelize IP3 and IP4 and take it into account in the global scheduling of the application. Merging all the generated VHDL code for each HW components has just to be done hereafter. Fig. 11 – Scheduling graph with a DSP and one FPGA for each IP c) Digital and analog ASIC support SDR design may not only involve fully programmable or reconfigurable devices, it is necessary to support also specific devices such as ASIC for instance. Let us cite in an SDR design typical ASIC that may be met: digital down and up converters (DDC and DUC), analog to digital and digital to analog converters (ADC and DAC), amplifiers, analog filters, antenna, etc. such as illustrated in Fig. 12. These circuits have to be inserted in the design flow for two reasons. They may be first the destination or the source of data to be processed. Secondly, they may be parameterizable and need to be configured through primitive functions activated by a processor for instance. Consequently the processor has to contain the code necessary to perform the initialization and #! ! ! # #$ % ! ! # " ! # #$ #' # #$ Time # #! # % ! ! # #$ # # ! ##! # # #! Fig. 12 – SynDEx hardware graph with ASIC (DDC, DUC, ADC, DAC) C. A non functional time-based approach The set of non functional characteristics to be taken into account would ideally be as many as possible, but to keep it feasible, it is restricted here to time. This means that each IP has only to be characterized in time in order to be integrated in the design flow. Time includes both processing and communication time. Either it is extracted from measurement or it can be estimated. Then SynDEx proceeds the global application timing optimization by mapping the IPs on the hardware architecture. The precision of the global design prediction depends on the accuracy on each IP. The partitioning algorithm used in SynDEx is a greedy algorithm, which provides a good compromise between partitioning execution time and quality result. Another approach has also been done based on genetic algorithm to refine the SynDEx greedy algorithm solution [49], which provides the same functional results. In case of the use of genetic algorithm, cost function and decreases the latency in some cases from 50%. The price is an extended computation time of many thousands compared to a solution obtained by SynDEx greedy solution. Moreover, added to time some memory consideration may be taken into account and considerably diminish default memory allocation done by SynDEx [49]. D. Platform abstraction This methodology implicitly provides on the one hand an abstraction of the hardware platform to the software processing elements by using already existing IPs. For the communications on the other hand, SynDEx generates a generic code which lets the entire possibility to the designer to translate it in whatever language it wants, as explained in V.A. The automatic partitioning is maximum in case of multiprocessing on the SW side (processors) using SynDEx, and on the HW side (FPGAs) using dedicated hardware tools such as GAUT for instance. Only the border between HW and SW has to be manually decided, which is the current limit of the HS/SW co-design issue. This abstraction permits ease in terms of portability on new processing units or new platforms. It relies on the use of libraries using the appropriate drivers for each platform An IP can be moved from SW to HW if its code is available > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < for both targets, in C and VHDL for example. It permits design adaptation by a simple click: - migrate a processing element from a processor to a FPGA and vice-versa, - add a processing element in a data-flow graph whatever it is on a processor, a FPGA or an ASIC, - change of platform, - migrate a processing element from a gate-oriented implementation to a processor-oriented implementation on a processor core inside a FPGA, - etc. E. Sum-up of methodology principles The proposed methodology is based on SynDEx tool in association with several concepts, such as the componentbased approach. This paragraph aims at recalling its advantages and show how it follows requirements of part II for SDR design as well as the realistic design flow exposed in part IV.B. SynDEx provides an overall execution time prediction of the application in a multi-processing platform with possibilities towards heterogeneous design. At least some contexts of FPGA design are fully integrated in the methodology, and ASICs are also included in the design. SynDEx has the advantage to propose an optimized and static scheduling, which is a guarantee for hard real-time constraints of SDR. Moreover, the methodology permits the automatic generation of the code of each processing unit of the platform including IPs encapsulation and communication glue. As this code is generic (M4 macro-code), it may be translated in whatever language, thus enabling the support of a heterogeneity of processing devices. The re-usability of IPs is intrinsic to the methodology. All this permits to speed up designs at an amazing rate. The reduced set of non-functional parameters is considered as a positive feature in the sense of ultra-fast prototyping. SynDEx is a tool that can be used in a couple of days to generate prototypes based on existing previous work. Students can use it for small projects. Only a few weeks are needed to become an expert. It is affordable to student trainees for a period of several months as soon as a supporting designer is in the lab. Section VI proves the approach efficiency for many examples of design scenarios. If he has a hardware platform at his disposal, a frequently user of this methodology takes the habit to make the simulation, test and validation of the application on the platform itself. Note that in the future any other tool providing extended features concerning the presented goals compared to SynDEx would be worth to be considered. Moreover, this methodology could be also expanded at the top of the design flow, towards higher level design tools based on UML. The input graphs from SynDEx could easily be generated from previous graphs defined in UML environment. A bridge between A3S graphs and SynDEx graphs for instance would be straightforward. SynDEx would then only take the timing non-functional parameter from A3S. The other non functional parameters 14 (power consumption, memory and surface considerations) taken into account in A3S would not be used in the mapping process itself but would at least have been considered at the very beginning of the design, which is better than nothing. Of course, long term goal is to really integrate all non-functional parameters in the complete design flow. VI. DESIGN EXAMPLES (IMPLEMENTATION) Whatever the methodology, the necessary effort to develop an SDR, is on the one hand to code an application to simulate and validate the correct functionality of the radio, and on the other hand to code low level software associated to the hardware components in order to prototype the radio on the platform. The lack of methodology makes all this repeated from scratch for each new design. This is what exists currently in SDR domain. The methodology proposed in this paper permits to mutualise the development effort of both: - signal processing IPs for the radio, - low level software of hardware architecture. It is the acceleration reason of the methodology in combination with a set of concepts particularly adequate for SDR design and explained earlier. We propose in the following paragraphs to illustrate the methodology efficiency in various design scenarii. Note that it will be illustrated that SDR design is tackled in the large here. This involves also cross-layer design, in the sense that PHY (radio) layer, as well as application (video) layer may be merged with the proposed design approach. This proves also the relevance of the approach for general heterogeneous embedded design outside SDR. A. Starting from scratch Our first attempt for developing an SDR is as simple as a broadcast FM receiver. This is a play activity for a hundred percent software radio implementation since the antenna is directly connected to the analog to digital converter. The selected platform is a Pentek P4292 made of four TMS320C6203 DSPs. All this processing power is much greater than necessary and is only used to evaluate the methodology in a multi-processing context. The work firstly consists in developing the FM receiver data-flow graph with a component based approach, and the fixed-point C language code for the content of each IP. This represents from a few days to a couple of weeks of work maximum, depending on the previous knowledge of the modulation, the quality of the receiver (stereo or mono, choice of demodulation algorithm, etc.). This can be done on a first hardware platform architecture made of one PC, which is available by default in SynDEx. Secondly, low level software for the HW component support are generated for the Pentek platform and gathered in libraries. This concern all the specific primitives that address synchronisations and threads for the C6203, as well as the communications dedicated to the FIFO of the platform. This may take several weeks, but less than 2 months, including M4 > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < learning. It is not at this stage that the methodology is permitting to gain since this preliminary work is always necessary. However, as the design steps are very well identified and separated in the methodology, this is already helpful and permits to save time. The work can also be easily separated between two designers, one signal processing designer for the software application (FM) and one for the platform software support (libraries). Fig. 13 gives a FM receiver application graph which permits to generate with the methodology the full software radio FM receiver. Bottom Fig. 13 represents the hierarchical view of the top FM Demodulation box. . *2 ( ) ( ' 0 3 DSPs with no additional effort. B. Digital ASIC implication Under-sampling techniques are used with a 65 MHz sampling frequency at the ADC. The tuning is managed by software that changes the DDC (Digital Down Converter). These consideration may appear or not in the HW platform graph. The digital ASICs in such architectures, and in the Pentek platform for example, are usually managed by the DSPs. The DSPs contain initialization code to configure the parameters of the ASICs, such as ADC and DDC here, but also DAC (Digital to Analog Converter) and DUC (Digital UpConverter) in the context of a transmitter. The DSP may also change these parameters at run-time. ADC and DDC are represented in a unique bloc in Fig. 14 since the only information the DSP has to know is the media (BIFO_VIM) where it has to send data and control towards the 2 ASICs. The connection between the 2 ASICs is wired after. ( '5 '3 4 " ' 3 '5 3 ' '3 $ 0 $ ( ( '2 0 '3 ' '2 / / ( ) * *2 ( ' ( ) * ' ! "! " ' ;; << '2 - ( / / '2, '2, & 2 '2 - ( / ( - ( & 2 / + ( / '2, $( # & 0 , # 0 ! ! Fig. 14 – SynDEx Pentek hardware platform graph graph ' - - ( / / / ' 0 10 '2 ' & ( 5$ '2 ' (* 0 0 10 '2, . 6 6 ( 2 ( - ( / ( * * 8 7$ -9/ - ( / 2 '2 , '2 ( ( ( 2 ( 2 - ( / '2 '2 6 ( ( * * 7$ / ' " - '2 / *2 '2 ' '2 ( ( " ( ( , ,$( . ! ! ( / 15 / / , / 6 ( - / ( * * 8 7$ - -:/ / - / 2 & $( & 2 2 ( '2, ( 2 & 2 , *2 '2 2 '2, & 2 '2, '2, $( Fig. 13 – SynDEx FM receiver application graph The FM receiver shown here is a complex one which operates stereo demodulation. It is the result of the improvement of much simpler previous designs. Only one DSP is largely enough to run the FM receiver in real-time, nevertheless the methodology permits to run it on several C. A new SW application on the previous platform The Pentek platform, with its 4 C6203, is of course supposed to support much more demanding radio applications, such as 3G radio waveforms. We then develop a UMTS FDD baseband chain implementing DPDCH (data) and DPCCH (control) channels till the intermediate frequency (IF). After functional validation in fixed point on a PC environment (Visual C++ or Borland), the application is then ported in the embedded multi-DSP environment of the platform at no cost since libraries were already existing from FM first experiments. This concretely means that once the SW application graph is done in SynDEx and validated on a monoprocessor PC architecture graph, the designer just need to take the hardware platform graph of the Pentek platform previously developed for the FM application, and map in one click the UMTS application on the Pentek multi-DSP platform. The methodology automatically generates from SynDEx and M4 macro-translator the main.c executive file of each DSP, including IPs launching and synchronizations means. The designer may even choose how many DSPs are necessary to run the application in real-time thanks to the overall execution time prediction provided by SynDEx. Anyway, it can directly check it on the platform itself in real conditions. SynDEx helps indeed at making this prediction while offering the possibility to automatically insert timing measurement between all IPs. Then from a mono-processing run on each potential device target, measurements give the figures of the execution time of each IP. Once these figures have been entered in SynDEx, as well as communication timings, the tool is able to optimize the > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < mapping for a multi-processor implementation. It provides a timing prediction and the associated code for each processing unit. The designer may explore different architectural possibilities till he is satisfied. Fig. 15 shows the UMTS transmitter application graph within SynDEx environment. Several hierarchical blocks are present in this graph, as well as repetitions. This makes a total of 130 instances deployed on the hardware architecture for the execution of one frame. One UMTS frame is composed of 15 slots, the content of which has been detailed in the bottom half part of Fig. 15. " ( ' . $5 ( ( -# > ! . " ' . . " / ' / ' * ; ! 7 . / ' . . /# / / ' ( . ( little changes could provoke. Let us consider the context of the repartition of the UMTS processing between a GPP and several DSPs. In the Pentek platform environment, this means adding a TCP link between one of the platform DSP and the host PC processor. SynDEx already has the TCP necessary library on the PC side. So just developing the equivalent library on the DSP side is necessary, which only consists in encapsulating the code provided by Pentek in a SynDEx library. This takes one day. Fig. 17 shows the SynDEx graph of the new platform involving the PC and the DSP which permits now to automatically provide the code and run the UMTS application in a new environment made of one Pentium and four TI C6203 processors. PC (PENTIUM) TCP /# . Personal Computer ADC_1 (DAC) . VIM . . / VIM_1 (BIFO) / TCP (TCP) / ' ( / . XX YY IO TCP ( (= / '2 . . / / /# $ /# '2 %0 / ''2 / /# $ '2 ' ( '2 '2 '2 , 11? ; DSP_B (TMS320C6203) XX YY IO TCP Bus_4 (BIFO) Bus_2 (BIFO) . / / ' * / ' ( Bus_1 (BIFO) DSP_A (TMS320C6203) ( . 16 ' ' / ' ' $ ' ' ' ' . ' ( (' . . . ( 11? * . ' ( ; 7 (' . DSP_C (TMS320C6203) XX YY IO TCP Bus_3 (BIFO) DSP_D (TMS320C6203) XX YY IO TCP /. ( ' ($ 7 $5 ( ( Fig. 15 – SynDEx UMTS transmitter application graph Fig. 17 – SynDEx Pentek+PC hardware platform graph Fig. 16 shows the Pentek platform HW architecture with four TI C6203 DSPs and an ADC. This gives the opportunity to rapidly evaluate the pertinence (or not) of new design choices at a very low cost, in real conditions. This is exactly the opposite of a manual design methodology where any architecture exploration is very costly. ADC_1 (DAC) VIM VIM_1 (BIFO) Bus_1 (BIFO) DSP_A (TMS320C6203) XX YY IO TCP XX YY IO TCP Bus_4 (BIFO) Bus_2 (BIFO) DSP_C (TMS320C6203) XX YY IO TCP DSP_B (TMS320C6203) Bus_3 (BIFO) DSP_D (TMS320C6203) XX YY IO TCP Fig. 16 – SynDEx Pentek hardware platform graph D. Very fast platform extension We saw in the previous paragraphs that the porting of the UMTS application on the Pentek platform is done at no cost as the SynDEx libraries already existing from the FM case study. It may also be necessary sometimes to rapidly investigate what E. Very fast application extension The extension of the platform towards the general purpose processing side is experimented in a cross-layer design mixing video and radio processing. We consider the following application already discussed in [40]: a combined UMTS FDD / MPEG-4 implementation. This cross-layer implementation benefits from two designs made in two different research contexts but using the same methodology proposed here. On the one hand, the SDR design approach we are illustrating in this paper, and on the other hand an image processing perspective to investigate MPEG-4 coding features. These two designs are being done in a completely separate manner in 2 distinct SynDEx projects. Each of the projects just has to connect to the same TCP socket, on the PC side for the MPEG-4 application, and on the DSP board side for the UMTS radio. Connection is obtained automatically between the two applications and MPEG-4 data are transmitted through a UMTS link. > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < F. Rapid prototyping on a new platform Now the context of the porting of this complex application gathering UMTS and MPEG-4 applications is considered, which constitutes another of the issues to be faced in SDR design. It has been illustrated in [40] in the case of a Pentek platform composed of two C6203 DSPs, and a Sundance platform having two C6416 DSPs. We suggest referring to [40] for more information about hardware platform description model and corresponding developed libraries. The schematic of Fig. 18 shows a sum-up the porting scenario. UMTS Modulation MPEG-4 Coder FIFO PC TCP PCI BUS FPGAs + DSPs Sundance Pentek Vitec MPEG-4 Decoder UMTS Demodulation FIFO SDB BIFO FPGAs + DSPs Sundance Pentek FIFO TCP PC + DSPs PCI BUS Vitec Fig. 18 – MPEG-4 over UMTS FDD Another platform used in this work is a desktop computer associated with the VP3-pmC multi-DSP board. The PC consists of an Intel Core 2 Duo CPU at 2.2 GHz. The VP3pmC is a parallel programmable processing platform dedicated to professional video applications like MPEG4-AVC/H.264 real-time encoding and MPEG2 to H.264 trans-coding. This platform contains 5 DSPs TMS320DM642 running at 720 MHz clock rate, each of them containing a SDRAM of 32 MBytes and a FPGA hardware co-processor to manage the communications between all DSPs on the platform and the communications with the PCI-Host. All transfers between DSPs are managed by 5 DMA controllers inside the FPGA. This platform has been previously described under SynDEx tool in [53]. The UMTS application is automatically prototyped on this platform and takes advantages of the methodology: no deadlocks, functional checking of the application portability and automatic multi-processors implementation. Due to the small internal memory, TI cache memory is automatically used as presented in [52]. Timings, given in TABLE III, have proven that increasing the number of DSPs for this application was not accurate since we obtain only a 1.35 acceleration factor from one to two DSPs and then no more acceleration whatever the number of DSPs until five. TABLE III TIMINGS OF THE UMTS TRANSMITTER OVER A NEW PLATFORM 5 DSPs 1 DSP 2 DSPs 3 DSPs 4 DSPs 14,3 ms 11,3 ms 11,3ms 11,3ms 11 ms Results on this platform are slower than real-time imposed by the standard that is 10ms. This shows that the application graph has to be redefined to extract more potential parallelism so that the hardware platform can perform more parallel execution. G. FPGA implication Another extension of the design space exploration is to 17 combine SW and HW processing, in the sense of processors and FPGAs. This could be the way to solve the real-time issue met with previous paragraph scenario. Thanks to the development of VHDL M4 libraries, the proposed methodology makes it possible to generate VHDL as easily as C language code from SynDEx. The only limitation have been exposed earlier: SynDEx is not appropriate to automatically optimize the scheduling and partitioning of multiple HW IPs since it based on a sequential processing model between IPs. But all the other features of the methodology are valid, such as automatic code generation of communications and synchronization means. And in the particular case of only one IP inside a FPGA, it means that the restriction is null and the FPGA is used as a hardware accelerator. The UMTS FDD baseband transmitter most demanding IP in terms of processing power is the pulse shaping filter. We propose to map the UMTS receiver in the hardware platform of Fig. 19 to accelerate the receiver execution. PC (PENTIUM) TCP Personal Computer ADC_1 (DAC) VIM VIM_1 (BIFO) XX YY IO TCP VIM VIM_1 (BIFO) TCP (TCP) Bus_1 (BIFO) DSP_A (TMS320C6203) FPGA_1 (fpga) DSP_B (TMS320C6203) XX YY IO TCP Bus_4 (BIFO) Bus_2 (BIFO) DSP_C (TMS320C6203) Bus_3 (BIFO) DSP_D (TMS320C6203) XX YY IO TCP XX YY IO TCP Fig. 19 – SynDEx Sundance+FPGA hardware platform graph Results of TABLE IV show the timings obtained after the implementation without or with FPGA. FPGA is used as an accelerator and permits to respect real-time constraints since a UMTS FDD frame has to be generated every 10 ms and the combination of a Xilinx Virtex2 FPGA and a TI C62x DSP permits to decrease the frame execution to 9.9 ms. TABLE IV TIMINGS OF THE UMTS TRANSMITTER WITHOUT AND WITH FPGA ON THE SUNDANCE PLATFORM Target Sundance Pentek Configuration 1*C64x 1*C62x Time/frame MFL ratio 15.9ms 60% 20.2ms 84% 1*XC2V + 1*C62x 9.9ms 32% > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < H. MPEG-4 Decoder rapid prototyping An MPEG-4 decoder description has been built for intra pictures in [54] according to video MPEG-4 texture decoding libraries, whose operation granularity fits to level of IDCT, VLC and dequantization within a block. Description granularity has a significant impact on the final implementation. The MPEG-4 decoder in [54] is extended in [50] to Simple Profile. MPEG-4 natural texture coding tools divide intra or predictive pictures into macroblocks, composed of four 8x8 blocks of luminance channel, and the associated 8x8 blocks of chromatic component. Each MB operation has been optimized for a DSP implementation: interleaving loops, conditional tests leading to pipeline rupture, no dynamic allocations. Our implementation is open source started from xvid decoder (http://www.xvid.org) and received the agreement of xvid team to be the first porting over a DSP. Thanks to the methodology developed here, we would like to emphasize how fast can be ported an application from one platform to anther. In this case, this MPEG-4 decoder has been quickly ported over several platforms presented earlier in this paper: VITEC, Pentek and Sundance. It can decode in realtime sequences up to 2048 Mbps with a VGA resolution at 60 frames per second on the TI C6416 DSP at 1 Ghz. The dataflow application development phase, in atomic operations enabling parallelism expression took around one person-year of work on Sundance platform. Then the porting over the 2 other platforms is instantaneous (less than one hour time) as soon as libraries of TABLE I in V.A.b) are available. This proves the efficiency of the design methodology proposed in this paper. VII. CONCLUSION We have discussed and tried to convince the reader about the advantages of the proposed methodology for SDR prototyping. This methodology has been derived from years of experience in terms of experimentations and permits now to produce prototypes with short delays. It can also be used by students as well because of its simplicity. We are convinced that SDR design will not be solved by a brand new design methodology. Moreover, SDR design perspectives are as numerous as radio design perspectives currently are. The main point to be stressed in this methodology is the gathering within a unique framework of several concepts and tools that match together. Concrete limits are defined and clear objectives are targeted. A component based approach is the basis of the methodology, and combined with efficient and realistic automatic transformations, it permits to propose a heterogeneous design strategy for SDR prototyping, and at a larger scale the design of any embedded equipments. Other tools may be chosen with respect to the rules given in Fig. 4. Moreover, other tools will have to be added to the proposed flow in order to enhance its current capabilities. In particular, we are particularly investigating solutions based on higher level design techniques using UML modelling. Benefits 18 from automatic co-design advances in general will help as well. A future step of the design solution we propose consists in merging the design methodology exposed in this paper with the reconfiguration management that we are investigating in other studies. This will constitute the ultimate level for SDR design we are aiming to attain. REFERENCES [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] [18] [19] [20] J. Mitola, “The software Radio Architecture,” IEEE Communications Magazine, May 95, pp. 26-38. J.H. Reed, B. D. Woerner, “Software Radio: A Modern Approach to Radio Engineering,” Prentice Hall, 2002 W.H. Tuttlebee, “Software Defined Radio: Origins, Drivers and International Perspectives,” John Wiley & Sons, 2002. A. A. Kountouris, C. Moy, L. Rambaud, “Reconfigurability: A Key Property in Software Radio Systems,” in Proc. First Karlsruhe Workshop on Software Radios, Karlsruhe, Germany, 29-30 Mar. 2000. P. Demestichas, G. Vivier, K. El-Khazem, M. Theologou, “Evolution in Wireless Systems Management Concepts: from Composite Radio Environments to Reconfigurability,” IEEE Communication Magazine, May 2004. A. A. Kountouris, C. Moy, “Reconfiguration in Software Radio Systems”, in Proc. Second Karlsruhe Workshop on Software Radios, Karlsruhe, Germany, Mar. 2002. J.P. Delahaye, C. Moy, P. Leray, J. Palicot, “Managing Dynamic Partial Reconfiguration on Heterogeneous SDR Platforms”, in Proc. SDR Forum Technical Conference’05, Anaheim, USA, 15-17 Nov. 2005. X. Revés, A. Gelonch, V. Marojevic, R. Ferrús, “Software radios: Unifying the reconfiguration process over heterogeneous platforms,” in EURASIP Journal on Applied Signal Processing, vol. 2005, issue 16, September 2005. S. Paquelet, C. Moy, L-M.Aubert, "RF Front-End Considerations for SDR Ultra-Wideband Communications Systems,” RF Design, Jul. 2004. C. R. Anderson, “A Software Defined Ultra Wideband Transceiver Testbed for Communications, Ranging, or Imaging,” PhD dissertation at Virginia Tech, 2007. J.-P. Delahaye, C. Moy, P.Leray, J. Palicot, “Partial Reconfiguration of FPGAs for Dynamical Reconfiguration of a Software Radio Platform,” in Proc. of IST Mobile and Wireless Communications Summit, Budapest, Hungary, June 2007. G. Gogniat, T. Wolf, W. Burleson, J-P. Diguet, L. Bossuet, R. Vaslin, “Reconfigurable Hardware for High-Security/ High-Performance Embedded Systems: The SAFES Perspective,” in IEEE Transactions on Very Large Scale Integration (VLSI) Systems, Volume 16, Issue 2, Feb. 2008, Page(s):144 – 155 V. Rana; M. D. Santambrogio; D. Sciuto, “Dynamic Reconfigurability in Embedded System Design,” ISCAS’07, IEEE International Symposium on Circuits and Systems, p.2734-2737, May 2007 J. Bier, “Use a Microprocessor, a DSP or both?,” in Proc. Embedded Systems Conference, April 4, 2007 C. Szyperski, Component Software, Beyond Object-Oriented Programming, Addison-Wesley, 1998. www.omgmarte.org C. Moy, M. Raulet, S. Rouxel, J.P. Diguet, G. Gogniat, P. Desfray, N. Bulteau, J.E. Goubard, Y. Denef, “UML Profiles for Waveform Signal Processing Systems Abstraction,” in Proc. of SDR Forum Technical Conference, Phoenix, USA, Nov. 2004 J. Laurent, E. Senn, N. Julien, E. Martin, “Functional Level Power Analysis: An Efficient Approach for Modeling the Power Consumption of Complex Processors,” in Proc. DATE’04, Paris, France, 2004. D. Elléouet, Y. Savary, N. Julien, “An FPGA Power Aware Design Flow, ” Lecture Notes in Computer Science, Springer, vol. 4148, 2006. S. Rouxel, J.P. Diguet, N. Bulteau, J. Carre-Gourdin, J.E. Goubard, C. Moy, “UML Framework for PIM and PSM Verification of SDR Systems,” in Proc. of SDR Forum Technical Conference, Anaheim (USA), Nov. 2005 > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < [21] “Joint Tactical Radio System (JTRS) Standard Modem Hardware Abstraction Layer Application Program Interface,” version 2.11.1, 02 MAY 2007 [22] G. Gaillard, E. Nicollet, M. Sarlotte, F. Verdier, “Transaction Level Modelling of SCA compliant Software Defined Radio Waveforms and Platforms PIM/PSM,” in Proc. of DATE’07, 2007 [23] C. Lucarz, M. Mattavelli, J. Thomas-Kerr and J. Janneck., “Reconfigurable media coding: a new specification model for multimedia coders,” Proceedings of the IEEE Workshop on Signal Processing Systems, 2007. Pages: 481 - 486. [24] S. Bernier, “SCA Reference Implementation,” http://www.crc.ca/scari/ [25] “Open SystemC Initiative, SystemC v2.0.1,” http://www.systemc.org [26] F Charot, M Nyamsi, P Quinton, C Wagner, “Architecture Exploration for 3G Telephony Applications Using aHardware-Software Prototyping Platform,” Proc. of Computer Systems: Architectures, Modeling and Simulation, 2003, Amos, Greece. [27] A. Koudri, D. Aulagnier, D. Vojtisek, P. Soulard, C. Moy, J. Champeau, J. Vidal, S. Lecomte, “RTL Code Generation from MARTE Models,” in Proc. of Modeling and Analysis of Real-Time and Embedded Systems with the MARTE UML profile at DATE’08, Munchen, Germany, 10-14 March 2008. [28] http://www.mopcom.fr [29] A. G. Kleppe, J.B. Warmer, W. Bast, “MDA Explained - The Model Driven Architecture : Practice and Promise,” Addison-Weslay, 2003 [30] S. Rouxel, G. Gogniat, J.P. Diguet, J.L. Philippe, C. Moy, “From MDD Concepts To Experiments And Illustrations,” in Schedulability Analysis and MDD, J.P. Babau, J. Champeau, S. Gérard, Ed. International Scientific and Technical Encyclopedia, September 2006, pp. 111-130 [31] B. Plunkett, J. Watson, “Adapt2400 ACM Architecture Overview,” QuickSilver Whitepaper, 2004 [32] R. Rabineau, D. Lattard, Y. Durand, M. Lobeira and JP Rossi, ”Flexible Test-Bed for B3G Systems,” in Proc. of IST Mobile and Wireless Communications Summit, Mykonos, Greece, June 2006. [33] Y. Durand, C. Bernard, and D. Lattard, “FAUST: On-chip distributed architecture for a 4G baseband modem SoC”, in Design & Reuse IPSoC, Grenoble, France, Dec. 2005 [34] V. Bose, “Design and Implementation of Software Radio Using a General Purpose Processor,” Ph.D. thesis at MIT, June 1999 [35] T. Turletti, D. Tennenhouse, “Complexity of a Software GSM Base Station,” in IEEE Communication Magazine, February 1999. [36] A. Hoffmann, H. Meyr, R. Leupers, “Architecture Exploration for Embedded Processors with LISA,” in Kluwer Academic Publishers ISBN 1-4020-7338-0, Dec 2002. [37] P. Coussy, C. Chavet, P. Bomel, D. Heller, E. Senn; E. Martin, “GAUT: A High-Level Synthesis Tool for DSP applications,” in High-Level Synthesis: From Algorithm to Digital Circuit, Springer, to appear, 2008 [38] D. Kammler, L. Godard, I. Gomez, “Second Report on ASIP Design Methodoloies,” NEWCOM report DR4.4, February 2007. [39] J-P. Calvez, V. Perrier, “SOC Architecting and Design with CoFluent Studio, Concepts and. Methodology -Part I,” available at: http://www.cofluent.com [40] M. Raulet, F. Urban, J.F. Nezan, C. Moy, O. Deforges, Y. Sorel, “Rapid Prototyping for Heterogeneous Multicomponent Systems: an MPEG-4 Stream Over an UMTS Communication Link,” in special issue on Design Methods for DSP Systems of Eurasip Journal on Applied Signal Processing, Kluwer Academic Publishers ; vol. 2006, pp. 1-13 [41] J. Muttersbach, T. Villiger, H. Kaeslin, N. Felber, W. Fichtner, “Globally-Asynchronous Locally-Synchronous Architectures to Simplify the Design of On-Chip Systems,” in Proc. of 12th IEEE International ASIC/SOC Conference, September 1999, p. 317-321 [42] A.R. Rhiemeier, “Benefits and Limits of Parameterized Channel Coding for Software Radio,” in Proc. of 2nd Karlsruhe Workshop on Software Radios, Germany, March 2002. [43] J. Palicot and C. Roland, “FFT: a basic function for a reconfigurable receiver,” in Telecommunications, 10th International Conference on, vol. 1, pp. 898–902, 2003. [44] C. Moy, J. Palicot, V. Rodriguez, and D. Giri, “Optimal determination of common operators for multi-standards software-defined radio,” in Proc. of 4th Karlsruhe Workshop on Software Radios, (Karlsruhe, Germany), March 2006. [45] V. Rodriguez, C. Moy, J. Palicot, “Install or invoke?: The optimal tradeoff between performance and cost in the design of multi-standard [46] [47] [48] [49] [50] [51] [52] [53] [54] 19 reconfigurable radios,” Wiley InterScience, Wireless Communications and Mobile Computing Journal, Special Issue on Cognitive Radio, Software Defined Radio And Adaptive Wireless Systems, vol 7, issue 9, Pages 1143 - 1156 J-P. Delahaye, “Plate-Forme Hétérogène Reconfigurable : Apllication à la Radio Logicielle,”Ph.D. thesis, Université de Rennes 1, 18 April 2007 http://www-rocq.inria.fr/syndex/ T. Grandpierre and Y. Sorel, “From algorithm and architecture specifications to automatic generation of distributed realtime executives: a seamless flow of graphs transformations,” in Proceedings of 1st ACM and IEEE International Conference on Formal Methods and Models for Co-Design (MEMOCODE ’03), pp. 123–132, Mont Saint-Michel, France, June 2003. M. Raulet, M. Babel, J.-F. Nezan, O. Deforges, and Y.Sorel, “Automatic Coarse Grain Partitioning and Automatic Code Generation for Heterogeneous Architectures,” in Proc. SIPS 2003, Seoul, Korea, August 27-29. 2003. M. Raulet, “Optimisations Mémoire dans la méthodologie «adéquation Algorithme Architecture» pour Code Embarqué sur Architectures Parallèles,” PhD thesis, INSA of Rennes, France T. Grandpierre, C. Lavarenne, and Y. Sorel, “Optimized rapid prototyping for real-time embedded heterogeneous multiprocessors,” in Proceedings of 7th International Workshop on Hardware/Software Codesign (CODES ’99), pp. 74–78, Rome, Italy, May 1999 F. Urban, M. Raulet, J.-F. Nezan, and O. Déforges, “Automatic DSP cache memory management and fast prototyping for multiprocessor image applications,” in 14th European Signal Processing Conference, Eusipco’06, Florence, Italy, September 2006. A. Maccari, J.F. Nezan, F. Urban, M. Raulet, “Interconnected distributed RAM in SynDEx,” in Workshop on Design and architectures for Signal and Image Processing, DASIP 2007, Grenoble, France. J.-F. Nezan, O. Deforges, and M. Raulet, “Fast prototyping methodology for distribued and heterogeneous architectures: application to Mpeg-4 video tools,” Design Automation for Embedded Systems, 2005. Christophe Moy was born in 1972 in La Roche sur Yon, France. He is an engineer of the INSA (National Institute of Applied Sciences), Rennes, France, 1995. He received his M.Sc. and Ph.D. degrees in Electronics in 1995 and 1999 from the INSA. He worked from 1995 to 1999 on spread spectrum and RAKE receivers for the IETR (Institute on Electronics and Telecommunications of Rennes). He then worked 6 years at Mitsubishi Electric ITE-TCL research lab where he was focusing on Software Radio systems and concepts, including digital signal processing, HW and SW architecture, codesign methodology and reconfiguration. He also addressed the design of SDR impulse UWB systems. He represented Mitsubishi Electric at the SDR Forum and worked on the French research program A3S (design of SDR through a Model Driven Architecture UML-based methodology), as well as the IST project E²R phase 1. He is now an Assistant Professor at SUPELEC, the oldest French engineering institute on Electronics. His research is done in the SCEE lab of the IETR, which focuses on Software and Cognitive Radio. He addresses heterogeneous design techniques for SDR, as well as cross-layer optimization topics. He participates to the IST Network of Excellence NEWCOM++, in the flexible radio work package. He is also involved in E²R phase 2 and several French RNRT project on SDR called Idromel and Mopcom.. Mickaël Raulet was born in 1977 in Saint-Brieuc, France. He received his postgraduate certificate in signal, telecommunications, images, and radar sciences from Rennes University in 2002, and his Engineering degree in electronic and computer engineering from National Institute of Applied Sciences (INSA), Rennes Scientific and Technical University. Next in 2006, he received a Ph.D. degree from INSA in electronic and signal processing in collaboration with the > EURASIP Journal on Embedded Systems – Special issue on Design and Architectures for Signal and Image Processing < software radio team of Mitsubishi Electric ITE (Rennes –France). He is currently in the Institute of Electronics and Telecommunications of Rennes (IETR) where he is a research engineer in rapid prototyping of video standard compression algorithms on embedded architectures (multi-DSP architecture) and he is the Image Group project leader of French Media and Network project such as Scalim@ges (MPEG-4 SVC main topic) and Mobim@ges (DVB-H main topic). Since 2007, he is involved in the ISO/IEC JTC1/SC29/WG11 standardization activities (better known as MPEG) such as a Reconfigurable Video Coding Expert. His interests include video standard compression and telecommunication algorithms and rapid prototyping on multi-DSP architectures from Texas Instruments. 20 '- $ "#0 - 504 - .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " 0 ( # A , 9 " , '< 0 '#0. ? ;B +0 ; # " 0 5 $ 0 (1#/#"("# / +< +" 1 ! ( 3# #) 3 +0 +0 ! " #" $ &'% % () * "+ , -! " - #" $ -% . / - ! ! "# ! ! ! ! $ ! % ! & '# ( ' # ( '# ) # )# $ '# ( '# $ ( ( $ '# ( ( ! * +(, $ * . ! ! $ * 0 0 #1 0 0 # " 0 2 0 " " 0 # 0 0 34 0 # # 5 & 6 3 74 3 84 3 94 - # 3 :4 - ; #< # # 5 + 6 ? " = # # # '& " & 5& 6 5 &&6 0 : @: " - * AB ! # & " 5 0 # ) # # # 6 " " 5!6 & # # 0 + 0 - A0 0 " # # 0 0 =- ) 3 >4 " - 0 " # # # - * 0# - ) 0 - * & 5 = 0 - " 0 = # 3 74 3 94 - ) 6 5 0 6 & # # # D - ) C " 5 !%% 6" # 5 +%! 6" - * 5 &% 6" 0 E -F # E " G - * - H 5 6 0 =3 I4 - ) # # 0 - ) 0 = 0 - E " # # E 0 " # - * J 0 # & - " 34 - # # = 0 - # 1 " # " - * #" " " 0 E # # -) - * 0 " 1 - " # = 0 " 0 # -* 0 # 0 =" 0 " * ? 0 -H 7" - & E " E - & 0 E E - + # # 0 " 0 # 0 0 " - * 9 -) 0 #" :- * 8 " # " #- 7 <0 " E # = 34 3 @4 - 0 = # # 5 # # - + ? # & 6 0 #- 56 + 34 + 56 " 0 # 1 • 1 • " 1 = • " " 1 " 0 & - ! ) " " 0 # $ # " 0 " = -* 0 + % $ & - 0 # # = " 7- &% ;&) # 0 0 ' # # # ;&) 1 " " 0 " 0 0 # " " " " - * 0 = = 8 # # 0 -) #- * # # 0 " -) " # #" " - ) 0 " 5 K = # # " 0 " # " 0 = # 6 " " - * E 0 " E " #" - 0 - H = 3 L4 " 0 " ? = " 5 # 6 # < 0 # ( -H = 0 #- K - * " # !%& " # - 0 = E = 0 0# = 3M4 A 0 • 0 • E • # # 0 1 34 " # = E= 0 # 374 " 384 - # E # ? - * 0 " " # 3 :4 394 3:4 - ; - + # 5- - 6 " # " *K # # 3>4 " 0 #- * # E E E - # - H # 0 # E E #1 0 #" # # " " " -F - 9 () $ H 0 * + 80 - # smart sub-system analysis stimuli decision learning user SDR communication sub-system network environment Application layer sensing means Multiple physical layers Multiple Multiplephysical physicallayers layers hardware * & # E # adapting means … electro-magnetic environment * orders ) $ 5 + 6 = 5 6 0 5 # # 0 6 - G 0 - + " E # = & " 0 1 -& - + + 0 # E -* 0 :- * = #- * # # " + 0 − # − − 0 − E 1 + #" & # " 0 5 !& #" " =" " " -6" E " +%! " ' *&" H +" -6" #N " 5 # 0 5 &A 6" " 1 1 * A 81 - = " - - 0 " =" = 1 " 0 5 " 6" - # = 3I4 - 0 0 #" 0 - * 0 0 G = 0 = 0 0 =E ? = ? -; # = - 0 " = - H 0 - H " 0 # # 0 - #0 #" 0 =E : E - ) # 0 =E , # & #" ) $ " -* E 0 E -+ #- ) 0 E 6 " H+ H " 0 " F # " &# % A KFE 6- * * # 'HF 3L4 #0 0 # 10 5 E 0 #" 0 # E 5 0 6" # - & 62H -F E : " " O &" " -& # & E # 0 - A 0 " 0 # -* #0 " # 0 # 6 0 0 " : & -+ + " 3@4 " # 5 " F 0 E = # A * # # E ' *& 5 ! )*& 5 ) 0 # !& " !%& * 0 - * 0 # # " # ( - * 0 & - H # 0 0 " 0 3 7M4 - 0 # 0 ./ # # 6 # # # 5 G #" # -* # - 5 # # ;&) #- 3 I4 6- * -; 76- # 0 6- * -F " - * # # # - & # # 0 = 3 I40 5 # ./ 0 + " ? * 5 # -* & " - 0 0 # " -; 0 0 0 # E # G # 6 # # 0 ;&) 1 # 5 % P # - > * # " 0 = 0 0 " - 0 9" 0 3 7 4 - + " E 0 = " # # -* + 0 - E 0 # = E ? - = E " - # E , E #- 0/ Li_ReMU 5 Li_CRMU 6 , , - 0 */ 1 2 2 = # " - # # # " # - " * # - # 0 # # " = " 0 #- * #" 0 #- Q 0 " 0 - - ) 0 Q # # #- H 0 7 # E 7Q " - 7Q 0 ' 8Q " " # 0 '- # &% +%! - * 8Q ' - # 8" # 8Q ' 6 7 8 " # 5 = ' # # - ) ? # = • + :- " 0 E Q 0 # - * = #" # # - Q " Q 5 E 0 = # " % 6" " # I 0 7Q '- ) • 7Q ' 8Q 1 56 ' = #N 5 6 # # # -) # # 0 # 7Q ( 5 7Q - *0 Q '- 7Q HE ' 6 # Q - • 8Q ' # - 8Q ' 1 56 8Q 8Q ' # # 0 # 0 # = 0 ' " " 0 N5 6 7Q & 7Q '" = - 8Q Cognitive Management ' " ' - Reconfiguration Management Level 1 Cognitive Manager L1_CRM Reconfig. Manager L1_ReM Standard Set Level 2 L2_CRMU L2_ReMU … L2_CRMU L2_ReMU … Level 3 L3_CRMU L3_ReMU L3_CRMU L3_ReMU … reconfiguration sensing orders information data from previous operator operator operator 3 % ' $ data to next operator ./ " $ " ) # = - & # -) = # = 0 + # # 0 Q # - * 0 # - * ? 3 774 - ) " # = 0 " " 0 8 = - @ 3 H # # = N56 0 -H 0N5 6 E N5 6 0 N5 6 E 0 N5 - * - ) " # 0 " 6 E =# 0 " " " " " - # 0 -+ 0 # ; G E; ' 5 ' # - * 6 3 784 - ' # # 0 - * ' #- F ' # - ) = # G # 5 " - + = & 6 > -* E ' - * 0 # -* - * " G - * - F 0 - * # 0 # G # G E = R # E " 0 0 - 5 ' 5 R 0 # ./ 6 ./ 6 5 A + ./ & ./ &# 4 # K % ( ./ - L * ./ 4 * * 4 < E 2 + # I 0 6 # + - H : 0 E # 0 E # 0 " ? #- 6 * #5 4 ? ./ 0 # # # # - ) = " -* 0 " 5 ; ; 6- # < ( * + I % $ 0 - 5 ; 6 # E -* - * = 0 G 6- # 5 =# 5 # - ) 6 G G * 0 Q -* Q # Q # Q Q - " H 0 0 0 = 0 - " M 0 0 8Q ' 8Q ' -* " # H - ) 8" + @ 8Q ' 8Q ' # 7 0 0 8 * $ 0 - * - # # - 7 ./ $ #1 • 0 = • " • 5 ) " ) 0 5 '6 ) " 0 '66 0 " • 0 • = " 0 " # 0 " " 5 '6 5 '6- #" - 5- - " 7 86 -+ " • 1 01 ST 7Q '56" • 71 ST 8Q '56" • 81 ST; - ? " • 1 # • " " • " • - * 8Q 5 E 5 Q 6 7Q # 0 # 0 6" 0 # 8Q &%- '56 5 # '566 0 5 8Q " ' " - 8Q 'Q 7Q ' - &% +%! -6 0 # # 0 0 5 L" 0 '" 7Q 0 0 0 E ' 6 7Q ' ? +%! " + = = - H 8Q ' " " 8Q 'Q # # 8Q 7Q 8Q - H 'Q " '- H 'Q 0 7Q # ' 8Q = 8 ** 0 'QF 0 8Q 'QF- &$ 4 F $ - ) E " 0 # 0 # 0 5 # # 5 = = # 0 0 0 " = -& - * 0 6 0 - * # # " = -+ 9 6 ./ M # 0 - $ #1 • • 0 = ) " ) ) " " 7 • 5 0 5 '6 0 '66 0 " • 0 • = " " " " 0 #" - = = - H " " 0 " 0 0 " - ) = #? - 0 = # * * " =# 0 " #? = -* # - * # 0 " 0 " + $- # - O # # 2) + 0 # " # " 0 " #- & " # 0 E - + # # # = 0 # # < " 0 - * < -* " - * 0 # 1 Cognitive Management 1 1..* Capture Metric Metric 1..* Evaluate Evaluate Metric Analyse Metric 1..* Analyse 1..* has 1..* Action Define Action Priority Prioritize Action Define Metric Strategy Action List of Action 1 1 " * 0 1 • 1 • #? 1 # - * 0 5 '6 • 1 " -F " 8 • % ? 1 5 " # • & # 6 1* # 0 6 5 " 5 '6" # 0 * " # # # # - # " - * " • 0 #1 " • " • 0 • =" ? 5 * " -6- " # = -* * * :; ) " 0 E #? - H " 0 H 0 " # =E 0 0 - E # - ) 0 =E " 0 = # Q - ) = 0 ) - " # 0 E - * = #" = #" - " C = D # " 0 Q 0 0 # - ) = # " "* ( 0 Q # - % " - " " # # " # - + - ) # # " =E 0 " 0 0 0 - " - 0 0 " 7 0 =5 # " -6 # = # 0 - < 9 ; # ? - * 0 0 # # - # # " 0 # " -* 1 = # Q " " # " - # " #- & " # # # - % 0 # 1 " # 8 0 !%& - *+ 8 &$ 0 E ; - & #" ; = - * ; # * &$ 8Q ' 8Q '- E = +%! &% #" 0 " " -* # - + 0 = H - *3 ' 4 * G E #" ;G + # #6 3 794 #- G E ;+5 E; G 0 # E E + ;+5 E 6 E& E 5 & 6- 0 " # E - R 3 7:4 # # )A ) # E - H R 0 0 -* # = E & 0 - * # " 0 -) " "0 - ) : 0 # #- * " " = # # - * 0 # # # 5 6 # = #0 - # H - ./ - R # +3 7>45 0 5 = + E 7= "= 7 " =6" 0 E "= 7 0 7 ( " 7= " B ) 7 R ( E - * # * 5 ' 7- 6 0" + + 6- + I" 0 9" - - * ( # 2 0 & ./ R E - * 7= # - E # E -) " 5 B ) 6 R - * E # -* # # " - * # " # " 5 0 E " " = " 6- + : 0 + 3 774 - " - ) # B ) 6- ) Q I" # # = # B ) 5 1 0 Q > - * + 0 # 0 E ; # - * & # 0 #" # - F# 0 " " " " " ( # 0 " -* -+ # = # 3 < ./ - 4 -* # " 0 " 5 " -) 6 " = # " O &" 0 < I # # " H " -) 0 " " " # 60 -* # 0 5 # 0 - H " " - H 0 0 = - =# E * 0 # 0 0 = 0 # " # " # # " " - E - * # - " 0 " " # &# 6- H " # 5 # 5 ' = - * 6 # 0 = " - * - ) 0 # A 0 E # -) # 0 0 -) # H "0 0 " 0 =0 " - 3 = % 3 % % * 2 0 -* - # -* # # - * " # 5 # E 6" 0 H # 3 7I4 -+ # # # - * 0 0 # 0 0 ? -H @ 3@4 * - F & 0 1 7Q '( 7Q # ' " 8Q '( 8Q - * ' 0 #? : L3_ReMU : L1_ReM : L1_CRM : L2_ReMU func_fusion : L2_CRMU func_fusion : L3_CRMU BWA BWA 0 :L3_ReMU BWR Input signal Operator BWA :L3_CRMU BWR + Operator BWR >FH 1 F FH 1 F # 1 # HK*1 H 1 :L3_CRMU :L3_ReMU Cyclo WVT :L3_ReMU Cyclo " 0 0 # K :L3_CRMU WVT :L3_ReMU Hspec Operator WVT Operator Cyclo :L3_CRMU Hspec Operator Hspec Signal to analyse Searching module Analyzing module 5 = * 0 F - & " 1 H " FH 1 F * # " H 5 FH 1 # 6 1 # #" HK*1 H 5 6 5 - H C " -* - ; # - " # " 0 #- " 8Q > '- # 7Q # + " # FH " ? # ' 7 D " 0 8Q EK +%! 6 ' 7Q ? '- 7Q ' " HK* " - * Q 0 0 E 3@4 ( -H 0 # - * -% + # 0 - * # + > > 0 = # 0 ( =- ) " # " " 0 # * 0 -* 0 # * #? #? #? # 0 - # 0 - - & #? 3@4 1 L • 0 1 0 • # • H " #1 0 EK E 1 0 + • * # % 0 # - ' 0 0 0 - ) 5 - ) #- + " &" 1 3 # " " " 0 ? " -6" " 0 = " I 0 0 Q 0 " Q " 7Q '((+ Q 7Q '((+ Q = Q -H = 0 5 0 0 " # 0 " " # -H 7Q 8Q Q # -* + >6- * '" - H Q Q '((+ Q Q 0 # = " -* -H #" - 6 = ) 7M 3* < " 0 0 #? + @- * -* # 6% 6 0 7 6% # 7Q 8Q '( ( '( ( 0 = Q Q $ H - 2 # #? - ) 0 " #? #" # " #? 0 + # L- 8 * " " -* E " # " 0 # =- + #" 0 # 1 " " # -) 5 7Q " ' " 0 - * " 6 7Q Q '- " Q Q " " 8Q = '(( " 0 ? " -* #" # - * # 0 ) - + 0 # - * - 7 * 0 0 0 0 # 0 - ; + = 0 0 " 7Q ' 8Q @56 MMM= ? @56 0 8Q -; + '(( Q 0 - Q) 8Q - H # 7Q Q ' - * - MMMQ+ 5MMM # # + + # F " 60 # 0 7M- 9 > * $ :; Q = - * ) " = # Q 0 -; 0 # - Q = - H " 0 " 0 # Q -) " 7Q " ' 8Q '- 3H # # - # # - * 0 - -* #" # # 0 -* " 0 # # 0- " " 0 # = 0 # # 0 - ) -* 0 # # 0 # # # 0 " # # - * - # 0 0 5 - + 0 #" = - 0 " 0 • # 0 # # -H - * 61 = " 77 • 3 7@4 0 5# 6" • # " • E " • & • % • A 0 # = " 0 " = 0 0 - 5 H - F# 5 " " - * 5 -6 0 E 6 " # - 5 = = 6 0 0 + " " 0 - " = - * 0 # 0 # - F# A " - * " = # # - - 0 0 # # H - E "0 = # # -* # E -) 0 0 " " " -H # 0 " 0 " # H(&H 0 0 " 0 )(; 0 = # " = -* - ) " = # ' % # * 63 7L4 0 E &# " 0 ( = 5 1 &%" " 0 E # 5 - 0 0 " ' " -* # 0 " = " - ) # " " -6- * #" 0 # +%! " 0 = - ) 0 " = # 5 #" 0 ---6 # # - 78 6 34$- " C 1 % - - " 3 74$ " C * ) & # ) ) - 0 & * -" &0 " A * D " ) 0 D " # 7MMM # " LL7 E ;G M- ML(A* - LL7-7>I@IM 3 84+- R- $ " C & $ E EF H 3 94$- - - H " C & 0 1 F E = &)% 7MM:18" 7I:E7@8 1 D " " 7MM7 #= " C & 3 >4+ D " ' A 0 " F- % 3 :4&- 0 0 " 785 :6" " C & % A # * H D " ) - 7M E77M" + =+ D " * $ 7MM: = A - M7E 8:" - 8:E:8" I" 7MM7 3 I4$-%- # " - #" %- & #" $- % % D "& " C + # % * " 5 '& 6" A 7MM: 3 @4$- " !- " C D " ) 1 3 ) = 0 " % H 4 " K - >" A - 9- 5LLL6" - 8E @ 3 L4 )* * * = 3M4A = " ; - &V 8 " C )*< U " $- % " ) 7MMI" F 3 4!- " - 0 " -" ! 0 F * & W MI" E: $ # " A - 7MMM - & " C " D " * " " C " A 0 P =" 7MM " = D " ) " X * 0 % ) + X D " $- - 78:E7:M0 = ? ) 5 ) 6" " LLL 394H- + " A 0 F E 0 & " K # "C D H # - 384B- O " $- ? # -R ? "C & * 7MMME8@ " 374 - #" $-%- D " )&* " " - < - R =D ") ? " - & NC % $ & EF & * & % " + - 7MM@" K 1 7" ) 1 % # " " - I9E@I 3:4 - ! ? ?" - " +- " $- % " C 1 " * 0 % "+ D " A 3>4+ 5 7@(M8(7MM>6 7MM>" 0 1((0 IMM - 3I4%- %Y# Y - I" - IL9E@M9 ? " ( " $- - I" ( = - 2G S " M- & J S 0 A 0 " 3@4 - " $- % * 3L4 &# 3 7M4 $- "$ - F D " %) #" " - " &- % W M:" F - A = " %" +- = D " ! # E7 7MMI D " '&)% ;W MI" % ? #" IMM " C ) #" C A 0 & "% " 8EI " C ' "& #" $- #" F- & " 7MMI EH F 7MM:" 7" " $- % " - % " C ) ' - II:EIIL -A ; " R- R 1 ; " - 0 " % 79 & D " ; 7MM@ 3 7 4- ! " - #" $- % & 3 774 - ! " & D " #" $- % 0 #" W M>" @E M$ R H 7MM>" = & ) 7MM - #= "! 0 " - E : " H& W M@" R & (M8EMLE :" 3 794; !- ;+ " 3 7:4%- - + - " +- + ( E =#" 3 7I4 - -& " $- % ( 2 " - - " =" " ; G (M8E MEM9" 7MM9 " - 7-M" & (- " C H F #" $ #% " & # " $- - $ ? D " A &" H 1((000- 7-M 1((000- 3 7>4+- F " 7MM@ 3 784; !- ; ! ' ! " H " C D ": ! * " C + &# - + ; # &(' =" < 7MM:" & *- ! G E 5 7MM:6 " + 0 =- " 7MM8 " C 0 H D " ) - - $ # 7MM8 3 7@4$-%- # " %- #" - #" C % 5 '& 6" :EL A 3 7L4 ' % % D& + & * < MI" 7MMI * " F " ; ! & " ; ! A 1 (MIEM@EM9 7: '- B; $ "#0 - 504 - " , '< 0 '#0. .$ ,$ # #$ 03 1$ $ ! 1 " '- '- " '- $ "#0 - 504 ( - #+ # 1$ $ ! 1 " '- '- " " ( #+ # c (d '> > 504< 6 9 6 B ?: c ( d '> _/ 1 , E .$ ,$ # #$ 03 " , D _< % % '0 < _1 9 D > 504< $ / 5#" # < " < @+ A T0 3#0 / B ?@ % " $ "< , " C 5. B 1< "( _< ;;? ) , '0 < " S < c (=d T0 3#0 $ "< '> > 504< , 5. 1< , '0 _ ' " ( " C . 5 + # 5#"< !"5< 1! . # >_< E#' / S < ' (< " < + T0 1 $ <1 E0*#$ " T< > c (;d '- 5 <* E$ 1 ,< * +'> 1 5 > (_< 5 # Z X > c (@d'> > 504< 5 X Y ! (!0!3$ #< > 1 "/ 4 1 3 /< _ 5, "1 / # > ' "0 , 1< '> > , , 33< _ 1 # ' > 504< * Y 5 # ' + <5 >< ; 5 > , #< " 0 F ,< * + > 4< 3 . ,# <* + Z " "( < > 2< " < ; c (:d * > 5 . T 3"$ T< 1 , 3V< #> Z$ .T < " # / V0,,$ < 2 .$ "$ F< '> > 504< - ,$ 5$ '< ' 10,Z$ 3< _5 ' # _< #> T > Z X > " @?+:=< T > >< < B+ ; c (Bd '> > 504< T0 3#0 $ "< 2 c ( d" '> '> B # > c ( d" / 2 # 5 +/ D ( 0 R eE ! ,< '> > " > 504< * 1 , '< 0 '#0. 6 _< +/ " _< C > " 3 V 3< '> 5#" ' 1 :% :)< > 504< 0 F ,< ! (!0!3$ #< * + > 1$ ! > 504< _/ 511 ' # 2 X" > ( ( 511< $ ( < :< + = +" #< 1< A > ! ,#'- E< T0 3#0 $ "< > C < Z" 9;< > 504< 5 X Y , #< _- >+, 1 ( 5 + "( R" 1 c (?d 5 X Y , #< / . 3< * 0 1 /0 ! "< 4 "0 ,< _ 5 "( 5 !+; " * " 1" "( <T C > = < @@7 " 7 :7:;=:? 1$ ! !0 . .$ "$ F< _ 6 _< f = ? ??? @ D c ( d '> 1 * $ < #< * , X_< 5 > $ 1 :;=:?< +, $ " ,$ '0#< _#C " 5 +! -$ ,$ _< ( B@ '- 504 $ "#0 - 2 5 - _< $ ' c ( d'> 1 $ 5 'SB< 6 V< '> C <^ Z (Z $ ' <E B< $ <" "( B% > 504< * <" B< ?< T0 3#0 $ "< '> T > Z X > < + c (Bd* _5 "1 / + > 1 , - 4< 1( # > ' c ( d* + > _< "1 1 , - 4< 1 / # > 3 /T- < 1 , - 4<_ _< $ "# 5 <- c ( d 3 /T- < * ,$ '0#< _ > T > Z X > < c ( d'> ' B: !$ $ < ]0 <^ < 1 ;= R X P @: > 504< _ " C , "1 9@< > , > ' < C 2 .$ "$ F< " "( > 504< ' < Z" S > - '- 5 3$ <* $ _< 6 < > - '- 5 3$ <* 6 " @ ;< ,$ '0#< _< @ < " < @+? B " ! $ < '> < Z" S < > 504< * ?+ ,$ '0#< '> > 504< * + # $ " SB< +@ A B< . 1 ,0 5 < C > 504< _1 9B< 1 6 " C 6 " < ' " > 504< * - 4< '> "1 . Z " ( < Z" 9 < 4< '> : + 5 - "-< '> > 504< > > 0 L ( > $ " ! $ <* c ( =d, U !01 1< '> > > Z X > " C c ( @d '- " 1 ,$ '0#< ]$ > 5 ' C > 504< * ( =@+ ; < T >< > [ , #< _ _< $ 5 '9@< . <" c ( d, U !01 1< '> > 504< * 6 5 ' 5 * :< 5(X <! & < + @% ' Z 3 C X '05 ) c ( ;d '- T0 3#0 $ "< '> > 504< , 5. 1< _ ( " C "( _< / T > > Z X > < Z" 9 < :?+B;< T > >< < ?+= c (:d ( _< " T > >< c (?d > ' . 6 ' c (;d" # ! ,< > E$ 301< * 6 ,$ '0#< _ ! > '> L _< "$ < #> FF$ F! " < '> % " )< c (@d T ( 1 " $ ,$ '0#< E 01 $ ! V< 1 0 5 + " C > Z X > " C < 01 $ ! * Z < 1$ $ ! ) > 504< * ' ; >T c (=dE #> >& .$ ,$ # #$ 03 6 ,$ '0#< _/ "1 "( +Z . @ ' _< ' C ' ' ,$ '0#< _ 6 _< @ >T ;=+;B< T >< ,$ '0#< '> @?< > 504< _, . B ,$ '0#< '> > 504< _ , '< 0 '#0. 6 _< @ > S:< + 0 > > <5 3 C" " '- $ "#0 - 504 - " ' B B% (( " # $ #+ # " ! cd * 5$ #0, < _#> C ==%@) :+= < 5 ( ??@ cd * (+ =+ 5$ #0, < _' _< >1 c ;d " $ - 4T$ 3< _' *" '< =%@)< c @d _"1 / c Bd 5 5 > <_ $ @ +@e5 ( ( $ # + @ </ C _< $ C '- " < < =+ 5 L < " _< <$ ".3+ < " <5 ( Z C ' 7 _< 7 2 # > 6 " < ?+ = 5 ( = C % ) '3" ' cd 4 "0 ,< ]5 ( > > 6 $ ' 5 ;;R@=< $ > < $ (< 5 ( ??; c ?d 5 X Y 6 '- ) > > >< "C L/ 3!-+.0 0 * 34< ]" <^ B >3! 4 3< _" ' L ' # . + 1 " > $ < 1$ $ ! "$ '0SB< '0 @ 4 . X_< > 77CCC g 27( X7?B g @g c :d . > _< 1< _" C 5 ' @ + e$ ".3+ = ?B + + =+ c =d * 1 .$ ,$ # #$ 03 , #< _0 > ( > ( 5 & C > (<^ ' > $ %5 '" 9 ?;)< "( 5 > 9 _< #>& ' < : c dCCC cd + " ! c d]* -$ 5 $ < ]. <^ > C (< 5 # "( $ c=d " . < " c@d V c:d0 > " <5 " <^ > </ E Z 1 77CCC L > < ! c?d #"$ " % > 77CCC 77CCC 7> >< E 5 7Z X > 5 ( C B L '> ' " C < , ( 77CCC < ]# E " > Z 7 , 7 5 $ 57 "5<^ "' 7 "' cBd !3 c dF 5 < $ 3 C B 1 # 9B< T %*# ") " <^ < ]"' c;d! ! , A > < >> 77 j < ]" C ( "$ * <^ @< " "( 7 > 7 > / < " # :< " > @ ) 7 , '< 0 '#0. " 2 BB '- $ "#0 - 504 c d/''< ]" 3 c dE 1( 5 + 5 (# X/ ^< .$ ,$ # #$ 03 #1 X 1$ $ ! 3 1 " + =@< '- '- " =@+@=< B< 01 $ ! V< T 50 ""3 < > # / V0,,$ <_ " 0 . < " <5 +' '15 _< $ $ "( ' % $ 5 ')< . <! (< " 1 $ @ c dMohamed GHOZZI, Mischa DOHLER, Francois MARX, Jacques PALICOT, “Cognitive radio : methods for the detection of free bands”, Comptes rendus Physique, Elsevier, 2006, vol. 7, n 7, pp. 794-804 c=dDuminda THILAKAWARDANA, Klaus MOESSNER, “A Genetic Approach to Cell- by-Cell Dynamic Spectrum Allocation for Optimising Spectral Efficiency in Wireless Mobile Systems”, CrownCom’07, 31 July-3 August 2007, Orlando, USA c;d> 77CCC c@d#> (! C ; 7 31 $ <4 "0 > ' =R = < 5 c:d5 X Y ,< ]/ > <^ / " 5 > +5 > < / 5 <* ' +1 = , #< _5 > ( " cBd* 1 6 9 ,# - #< #L L '00 B< = A 0 A +0 c = d T ,0 k# VV [< _" C > >< B c =;d c =@d B 0, 31< * _< $ ' + > + > +/ 6 C ,$ '0#< _ 2 _< $ - " " ,$ 3#< '> "( B 1(" 3 , ' 0 5 C X > < % ' < 5,< 5 % T ,)_< 1= _$ (" > $ "' "S:< $ & 5 ! "#$ <, L !$ 03$ <] 1( " 3 C/ 1( B< B+ B< 1 $ ,$ '0#< _ 3 C ' 5 L <A ^< Z = ( _ 1= = _ " / = E0*#$ " T< ^ ^< 5 B< . < ' , 6 . < 1< 1 2 R= \ ) ( +1" #$':2_< < 9 , c = d* ( V+ 05 0< 0 3 0 +1 ' 1(" 3 B< $ $ " 3 C X < c ==d'> * " 1 , - 4<_ _< #>& c = dV $ 3" 0, 31< ]//# < #> < / < , !-0 Z 4 ,< 4 > > //# T <! & < : + > , B + 9 ,$ '0#< '> <^ $ '#9= ' c d c?d* > + 2 '5 $ $ %5 50'01 9=)< > , '< 0 '#0. > > ( '- $ "#0 - 504 ( c =:d 5 " + > 1B _ ' < Z> . c =Bd5 > 9 > .$ ,$ # #$ 03 " > 1$ $ ! 1 " '- '- " _ 6 1 / $ < _ !-0VV$ < _1 2 > < " 2 S ! , '< 0 '#0. 2 ' _< #>& BA B? Résumé Le manuscrit de cette Habilitation à Diriger des Recherches reprend quelques thèmes de mon parcours de recherche depuis 10 ans, et que l’on peut résumer ainsi. De la conception radio classique pendant ma thèse (à base d’ASIC) j’ai vécu la transition vers la conception radio logicielle (dans un laboratoire de recherche d’un grand groupe industriel) et enfin j’étends désormais en parallèle mes travaux vers la conception radio intelligente (dans un laboratoire de recherche académique). Cinq travaux sont repris dans ce mémoire, portant sur la radio logicielle et la radio intelligente. Ils correspondent à cinq des thèses que j’ai encadrées. Ils portent tout d’abord sur la contribution à une approche de conception pragmatique pour la radio logicielle orientée composants. La radio logicielle en effet implique l’utilisation de ressources de traitements multiples et hétérogènes, générant ainsi des difficultés de conception non encore résolues dans leur ensemble par la communauté de l’architecture. L’un des principes repose sur l’indifférence qu’il doit y avoir entre traitements logiciels et matériels, quel que soit leur niveau de reconfigurabilité. Un effort de formalisation de la conception radio logicielle est proposé dans un deuxième temps, dans une approche orientée opérateurs communs. L’idée repose sur la description à différents niveaux de granularité des opérateurs de traitement du signal afin de bénéficier de la réutilisation de motifs de calcul. Le travail d’optimisation consiste à sélectionner le niveau de granularité adéquate suivant les besoins du concepteur. La gestion de reconfiguration, qui est un point central si ce n’est le point distinctif de la radio logicielle est le cœur du troisième travail présenté. Sa généralisation à la radio intelligente permet d’aboutir dans un quatrième cas d’étude à la proposition d’une architecture de gestion radio intelligente : HDCRAM. Une modélisation de cette architecture est proposée sous la forme d’un méta-modèle exécutable. Cela permet notamment de disposer d’un simulateur de scénarios du comportement de l’architecture de gestion d’une radio intelligente et de pouvoir la dimensionner pour chaque scénario. Enfin la dernière étude porte sur la définition du concept de bulle sensorielle radio intelligente qui porte particulièrement sur l’ensemble des capteurs d’un équipement de radio intelligente. Les perspectives de ces travaux sont notamment d’étudier le troisième et dernier élément du cercle cognitif simplifié (après la reconfiguration et les capteurs) qu’est la prise de décision. Abstract The manuscript of the present Habilitation à Diriger des Recherches covers a few topics of my research activities during the past 10 years, from classical radio design, to software radio design, and finally towards cognitive radio design. Five of my PhD students works on software radio and cognitive radio are detailed here. First is a contribution to a pragmatic approach for software defined radio design. It is a component-based approach. Software defined radio implies indeed multiple and heterogeneous resources, which generates design issues that have not been solved by yet. A formalisation effort of software defined radio design is proposed in a second work, using a common operators design approach. A graphical optimisation tool is proposed to select the adequate level of granularity of signal processing operations depending on the designer’s needs. Reconfiguration management, which is the main distinctive point of software defined radio is the heart of the third work presented in this document. The generalisation of this to cognitive radio leads to a fourth work on the proposal of a completely new cognitive radio management architecture: HDCRAM. A modelling approach of this architecture is proposed through an executable metamodel. This also constitutes a simulator of scenarios of the behaviour of a cognitive radio management architecture. Finally, last work proposes the definition of the sensorial cognitive radio bubble that particularly tackles the sensing side of a cognitive radio equipment. Perspectives to these works consists in studying the third and last element of the cognitive cycle (added to reconfiguration and sensors): the decision taking.