principleMinor
Simple word translator taking advantage of Wikipedia
Viewed 0 times
simplewikipediawordadvantagetranslatortaking
Problem
This is my first Clojure script (simple word translator based on Wikipedia), and I guess there are things which could be simplified / done more idiomatic way. Specifically, I wonder if
get-translations couldn't be done better (it produces map lang code => term form regexp sequence).(ns wiki-translate
(:require [clojure.contrib.http.agent :as h])
(:require [clojure.contrib.string :as s])
(:import (java.net URLDecoder URLEncoder))
)
(defn url-decode
([text] (URLDecoder/decode text "UTF-8"))
)
(defn url-encode
([text] (URLEncoder/encode text "UTF-8"))
)
(defn get-url
([lg term] (str "http://" lg ".wikipedia.org/wiki/" (url-encode term)))
)
(defn fetch-url
([url] (h/string (h/http-agent url)))
)
(defn get-translations
([cnt] (apply sorted-map (flatten (for [s (re-seq #"interwiki-([^\"]+).*wiki\/([^\"]+)\".*/a>" cnt)] [(s 1) (s 2)]))))
)
(defn translate
([term src-lg tgt-lg] (
let [translations (get-translations (fetch-url (get-url src-lg term)))]
(if (contains? translations tgt-lg) (s/replace-str "_" " " (url-decode (get translations tgt-lg))) "")))
)
(defn prompt-read
([] (prompt-read ""))
([prompt] (print (format "%s: " prompt)) (flush ) (read-line))
)
(defn prompt-translate
([] (let [src-lg (prompt-read "Source language (en, fr, de ...)") tgt-lg (prompt-read "Target language (en, fr, de ...)") term (prompt-read "Term to translate")]
(println (str "\"" term "\" translated from " src-lg " to " tgt-lg " : " (translate term src-lg tgt-lg)))))
)
(while true (prompt-translate))Solution
Don't use the multiple-arity syntax for
Your code could use some line breaks here and there. This is how clojure is usually indented:
and:
Instead picking out the regexp groups by index, you should use destructuring:
... or better yet, I think, would be to do something like this:
defn unless you actually need to:(defn url-decode
[text]
(URLDecoder/decode text "UTF-8"))Your code could use some line breaks here and there. This is how clojure is usually indented:
(defn translate
[term src-lg tgt-lg]
(let [translations (get-translations (fetch-url (get-url src-lg term)))]
(if (contains? translations tgt-lg)
(s/replace-str "_" " " (url-decode (get translations tgt-lg)))
"")))and:
(defn prompt-translate
[]
(let [src-lg (prompt-read "Source language (en, fr, de ...)")
tgt-lg (prompt-read "Target language (en, fr, de ...)")
term (prompt-read "Term to translate")]
(println (str "\"" term "\" translated from " src-lg " to " tgt-lg " : " (translate term src-lg tgt-lg)))))Instead picking out the regexp groups by index, you should use destructuring:
(defn get-translations
[cnt]
(apply sorted-map
(flatten
(for [[_ a b] (re-seq #"interwiki-([^\"]+).*wiki\/([^\"]+)\".*/a>" cnt)]
[a b]))))... or better yet, I think, would be to do something like this:
(defn get-translations
[cnt]
(apply sorted-map
(mapcat
rest
(re-seq #"interwiki-([^\"]+).*wiki\/([^\"]+)\".*/a>" cnt))))Code Snippets
(defn url-decode
[text]
(URLDecoder/decode text "UTF-8"))(defn translate
[term src-lg tgt-lg]
(let [translations (get-translations (fetch-url (get-url src-lg term)))]
(if (contains? translations tgt-lg)
(s/replace-str "_" " " (url-decode (get translations tgt-lg)))
"<NOT FOUND>")))(defn prompt-translate
[]
(let [src-lg (prompt-read "Source language (en, fr, de ...)")
tgt-lg (prompt-read "Target language (en, fr, de ...)")
term (prompt-read "Term to translate")]
(println (str "\"" term "\" translated from " src-lg " to " tgt-lg " : " (translate term src-lg tgt-lg)))))(defn get-translations
[cnt]
(apply sorted-map
(flatten
(for [[_ a b] (re-seq #"interwiki-([^\"]+).*wiki\/([^\"]+)\".*/a>" cnt)]
[a b]))))(defn get-translations
[cnt]
(apply sorted-map
(mapcat
rest
(re-seq #"interwiki-([^\"]+).*wiki\/([^\"]+)\".*/a>" cnt))))Context
StackExchange Code Review Q#10126, answer score: 7
Revisions (0)
No revisions yet.