暇なのでPythonのサンプルをHaskellに移植してみました
「集合知プログラミング」のPythonのサンプルをHaskellで書いてみました。最近、協調フィルタリングのお勉強をしているのですが、コードを読んでいるだけではなかなかしっくりこないので、実際に書いてみました。
まず、Pythonのコード
映画の評価データ、Lisa Roseさんはそれぞれの映画を、2.5、3.5、3.0...で評価しているというデータ
#!/usr/bin/python #! -*- coding: utf-8 -*- critics = { 'Lisa Rose': { 'Lady in the Water': 2.5, 'Snake on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, 'The Night Listener': 3.5 }, 'Gene Seymour': { 'Lady in the Water': 3.0, 'Snake on a Plane': 3.5, 'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0, 'You, Me and Dupree': 3.5 }, 'Michael Phillips': { 'Lady in the Water': 2.5, 'Snake on a Plane': 3.0, 'Superman Returns': 3.5, 'The Night Listener': 4.0 }, 'Claudia Puig': { 'Snake on a Plane': 3.5, 'Just My Luck': 3.0, 'The Night Listener': 4.5, 'Superman Returns': 4.0, 'You, Me and Dupree': 2.5 }, 'Mick LaSalle': { 'Lady in the Water': 3.0, 'Snake on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0, 'You, Me and Dupree': 2.0 }, 'Jack Matthews': { 'Lady in the Water': 3.0, 'Snake on a Plane': 4.0, 'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5 }, 'Toby': { 'Snake on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0 } }で、その評価データを利用して計算するmainコード、本に掲載されているとおり
#!/usr/bin/python #! -*- coding: utf-8 -*- from MovieEval import critics def sim_distance(prefs, person1, person2): si = {} for item in prefs[person1]: if item in prefs[person2]: si[item] = 1 if len(si) == 0: return 0 sum_of_squere = sum([ pow(prefs[person1][item] - prefs[person2][item], 2) for item in prefs[person1] if item in prefs[person2] ]) return 1 / (1 + sum_of_squere) def calcScore(name, prefs): print("%s:" % name) for (nm, val) in prefs.items(): print(" %20s %20f" % (nm, sim_distance(critics, name, nm))) print("--------------------------------------------------") if __name__ == '__main__': for (name, ev) in critics.items(): calcScore(name, critics)これを実行すると
cuomo@karky7 ~ $ python recommendations.py Jack Matthews: Jack Matthews 1.000000 Mick LaSalle 0.137931 Claudia Puig 0.181818 Lisa Rose 0.200000 Toby 0.117647 Gene Seymour 0.800000 Michael Phillips 0.181818 -------------------------------------------------- Mick LaSalle: Jack Matthews 0.137931 Mick LaSalle 1.000000 Claudia Puig 0.173913 Lisa Rose 0.307692 Toby 0.307692 Gene Seymour 0.129032 Michael Phillips 0.285714 -------------------------------------------------- Claudia Puig: Jack Matthews 0.181818 Mick LaSalle 0.173913 Claudia Puig 1.000000 Lisa Rose 0.444444 Toby 0.235294 Gene Seymour 0.133333 Michael Phillips 0.571429 -------------------------------------------------- Lisa Rose: Jack Matthews 0.200000 Mick LaSalle 0.307692 Claudia Puig 0.444444 Lisa Rose 1.000000 Toby 0.222222 Gene Seymour 0.142857 Michael Phillips 0.666667 -------------------------------------------------- Toby: Jack Matthews 0.117647 Mick LaSalle 0.307692 Claudia Puig 0.235294 Lisa Rose 0.222222 Toby 1.000000 Gene Seymour 0.108108 Michael Phillips 0.285714 -------------------------------------------------- Gene Seymour: Jack Matthews 0.800000 Mick LaSalle 0.129032 Claudia Puig 0.133333 Lisa Rose 0.142857 Toby 0.108108 Gene Seymour 1.000000 Michael Phillips 0.210526 -------------------------------------------------- Michael Phillips: Jack Matthews 0.181818 Mick LaSalle 0.285714 Claudia Puig 0.571429 Lisa Rose 0.666667 Toby 0.285714 Gene Seymour 0.210526 Michael Phillips 1.000000 -------------------------------------------------- cuomo@karky7 ~ $Michael PhillipsさんとMichael Phillipsさんの評価はまったく同じなので、スコアは1.0になるわけで、当たり前ですよね。1に近ければ近いほど評価が同じということ。
これをHaskellで書いてみた
基本的には同じ処理です、まず評価データmodule MovieEval ( Eval, movName, movEv, getCritics ) where import qualified Data.Map as M data Eval = Eval { movName :: String, movEv :: Double } deriving(Show, Eq) getCritics :: M.Map String [Eval] getCritics = M.fromList $ [ ( "Lisa Rose", [ Eval "Lady in the Water" 2.5, Eval "Snake on a Plane" 3.5, Eval "Just My Luck" 3.0, Eval "Superman Returns" 3.5, Eval "You, Me and Dupree" 2.5, Eval "The Night Listener" 3.5 ] ), ( "Gene Seymour", [ Eval "Lady in the Water" 3.0, Eval "Snake on a Plane" 3.5, Eval "Just My Luck" 1.5, Eval "Superman Returns" 5.0, Eval "The Night Listener" 3.0, Eval "You, Me and Dupree" 3.5 ] ), ( "Michael Phillips", [ Eval "Lady in the Water" 2.5, Eval "Snake on a Plane" 3.0, Eval "Superman Returns" 3.5, Eval "The Night Listener" 4.0 ] ), ( "Claudia Puig", [ Eval "Snake on a Plane" 3.5, Eval "Just My Luck" 3.0, Eval "The Night Listener" 4.5, Eval "Superman Returns" 4.0, Eval "You, Me and Dupree" 2.5 ] ), ( "Mick LaSalle", [ Eval "Lady in the Water" 3.0, Eval "Snake on a Plane" 4.0, Eval "Just My Luck" 2.0, Eval "Superman Returns" 3.0, Eval "The Night Listener" 3.0, Eval "You, Me and Dupree" 2.0 ] ), ( "Jack Matthews", [ Eval "Lady in the Water" 3.0, Eval "Snake on a Plane" 4.0, Eval "The Night Listener" 3.0, Eval "Superman Returns" 5.0, Eval "You, Me and Dupree" 3.5 ] ), ( "Toby", [ Eval "Snake on a Plane" 4.5, Eval "You, Me and Dupree" 1.0, Eval "Superman Returns" 4.0 ] ) ] -- End of Listで、評価の計算コード
import qualified Data.Map as M import MovieEval import Control.Monad (forM_) import Text.Printf import Debug.Trace sim_distance :: M.Map String [Eval] -> String -> String -> Double sim_distance perfs person1 = \person2 -> sum_of_square p1 (M.lookup person2 perfs) where p1 = M.lookup person1 perfs sum_of_square :: Maybe [Eval] -> Maybe [Eval] -> Double sum_of_square (Just p1) (Just p2) = 1 / (1 + total) where ev = map (\(v, w) -> (movEv v, movEv w)) [(x, y) | x <- p1, y <- p2, movName x == movName y] total = sum $ map (\(p1e, p2e) -> realToFrac (p1e - p2e) ^ 2) ev sum_of_square _ _ = 0.0 calcScore :: String -> [String] -> (String -> String -> Double) -> IO() calcScore name names f = do putStrLn $ name ++ ":" forM_ names $ \nm -> do printf " %20s %20f\n" nm (f name nm) putStrLn "--------------------------------------------------\n" main :: IO() main = do let perfs = getCritics names = M.keys perfs mapM_ (\name -> calcScore name names (sim_distance perfs)) namesこれをHaskellで実行すると
cuomo@karky7 ~ $ runhaskell recommendations.hs Claudia Puig: Claudia Puig 1.0 Gene Seymour 0.13333333333333333 Jack Matthews 0.18181818181818182 Lisa Rose 0.4444444444444444 Michael Phillips 0.5714285714285714 Mick LaSalle 0.17391304347826086 Toby 0.23529411764705882 -------------------------------------------------- Gene Seymour: Claudia Puig 0.13333333333333333 Gene Seymour 1.0 Jack Matthews 0.8 Lisa Rose 0.14285714285714285 Michael Phillips 0.21052631578947367 Mick LaSalle 0.12903225806451613 Toby 0.10810810810810811 -------------------------------------------------- Jack Matthews: Claudia Puig 0.18181818181818182 Gene Seymour 0.8 Jack Matthews 1.0 Lisa Rose 0.2 Michael Phillips 0.18181818181818182 Mick LaSalle 0.13793103448275862 Toby 0.11764705882352941 -------------------------------------------------- Lisa Rose: Claudia Puig 0.4444444444444444 Gene Seymour 0.14285714285714285 Jack Matthews 0.2 Lisa Rose 1.0 Michael Phillips 0.6666666666666666 Mick LaSalle 0.3076923076923077 Toby 0.2222222222222222 -------------------------------------------------- Michael Phillips: Claudia Puig 0.5714285714285714 Gene Seymour 0.21052631578947367 Jack Matthews 0.18181818181818182 Lisa Rose 0.6666666666666666 Michael Phillips 1.0 Mick LaSalle 0.2857142857142857 Toby 0.2857142857142857 -------------------------------------------------- Mick LaSalle: Claudia Puig 0.17391304347826086 Gene Seymour 0.12903225806451613 Jack Matthews 0.13793103448275862 Lisa Rose 0.3076923076923077 Michael Phillips 0.2857142857142857 Mick LaSalle 1.0 Toby 0.3076923076923077 -------------------------------------------------- Toby: Claudia Puig 0.23529411764705882 Gene Seymour 0.10810810810810811 Jack Matthews 0.11764705882352941 Lisa Rose 0.2222222222222222 Michael Phillips 0.2857142857142857 Mick LaSalle 0.3076923076923077 Toby 1.0 -------------------------------------------------- cuomo@karky7 ~ $Pythonって直感的に書いていけるところはやはり早しいいなって思うところデスが、Haskellって関数を渡せるところが凄くいけてる感じがしますね。評価元となる人のパラメータで関数を一時的に保存しておいて、後から評価対象の人を渡すだけでグリグリ回せちゃうとこなんか感心してしまいます。チョットmapを多用し過ぎ感があったり、Applicative辺りを利用すればもっと綺麗に書けそうな気がしてならないのですが、ただいまHaskell勉強中なので、あまり突っ込まないでください。
でも、こういう書き方もあるよっていうのがあったら知りたい次第でござりまする、では
0 件のコメント:
コメントを投稿