Metinde en cok gecen 3 kelime problemi

Merhaba;

Problemi codewars’ta buldum.

Sonradan kisaltmak uzere Python ile soyle bir sey yazmistim: (Olmadi)

get_frequences = lambda text: {value: text.count(value) / len(text) for value in set(text)}

def top_3_words(text):
    # Clear text
    for char in "/.,'-":
        while char in text:
            text = text.replace(char, "")
    freqs = get_frequences(text.split())
    sort = sorted(freqs.items(), key=lambda x: x[1], reverse=True)
    if len(sort) < 3:
        return [i[0].lower() for i in sort]
    
    return [i[0].lower() for i in sort[0:3]]

Test case’ler:

# Test caseler codewarstan, ama Test'in nerede oldugunu anlamadigim icin:
class Test:
    @staticmethod
    def assert_equals(a, b): return True if a == b else f"{a} should equal to {b}"

Test.assert_equals(top_3_words("a a a  b  c c  d d d d  e e e e e"), ["e", "d", "a"])
Test.assert_equals(top_3_words("e e e e DDD ddd DdD: ddd ddd aa aA Aa, bb cc cC e e e"), ["e", "ddd", "aa"])
Test.assert_equals(top_3_words("  //wont won't won't "), ["won't", "wont"])
Test.assert_equals(top_3_words("  , e   .. "), ["e"])
Test.assert_equals(top_3_words("  ...  "), [])
Test.assert_equals(top_3_words("  '  "), [])
Test.assert_equals(top_3_words("  '''  "), [])
Test.assert_equals(top_3_words("""In a village of La Mancha, the name of which I have no desire to call to
mind, there lived not long since one of those gentlemen that keep a lance
in the lance-rack, an old buckler, a lean hack, and a greyhound for
coursing. An olla of rather more beef than mutton, a salad on most
nights, scraps on Saturdays, lentils on Fridays, and a pigeon or so extra
on Sundays, made away with three-quarters of his income."""), ["a", "of", "on"])

Ben 2. ve 3. testlerde takildim. Baska dillerle yapmak isteyen olursa buyursun.

Kolay gelsin

Şimdi öncelikle soruda verdikleri örnekler yanıltıcı. Çünkü örneklere bakarak, gelecek metinlerin yazım kurallarına uygun olduğunu düşünüp ona göre işlem yapıyorsunuz. Ama rastgele metinlerde şöyle örnekler var

in'uIL::.-JofUjJ,,?oJgDPCms;LLTACRZw?! /JofUjJ ?!Kge/JofUjJ,/-.!in'uIL:._,,LLTACRZw?vhiTDLX qsugWqt_,,;,Kge?!/:/JofUjJ /;Kge,,!.vhiTDLX.?in'uIL,:-,LLTACRZw...? LLTACRZw-!oJgDPCms,;// jNPJE,,,,_JofUjJ.-..:JofUjJ!.:,in'uIL-  ,!in'uIL-!!,rkFglqTFt-;?LLTACRZw?:-;in'uIL;_/vhiTDLX-;?/;rkFglqTFt? /-oJgDPCms.oJgDPCms,/vhiTDLX./vhiTDLX/JofUjJ,!vhiTDLX//:-VRxS_jNPJE:/ /;drSzPVwtS,-.qsugWqt??drSzPVwtS,_ -?in'uIL?in'uIL:./??Kge -? vhiTDLX:XrH_/in'uIL;;/_,drSzPVwtS-LLTACRZw!!qsugWqt: ?!:in'uIL.-;-VRxS!LLTACRZw.!in'uIL  jNPJE:?in'uIL?vhiTDLX./._?drSzPVwtS/ /XrH-XrH;:!,:Kge.:-LLTACRZw.-!;_drSzPVwtS,: !drSzPVwtS. ,!LLTACRZw. ?in'uIL-?oJgDPCms.Kge-!_LLTACRZw/:.!_jNPJE/Kge.,,qsugWqt.,in'uIL;.,?drSzPVwtS,-, JofUjJ,-;:oJgDPCms.!-XrH.jNPJE/?!/LLTACRZw  !..JofUjJ 

Bu da benim çözümüm

# coding: utf-8

import string
import re

def clear(text):
    if text == None:
        return ""

    result = ""
    valid = string.ascii_letters + " '"
    text = text.strip()
    
    for i in text:
        if i in valid:
            result += i
        else:
            result += " "

    result = re.sub(" +"," ",result)
    return result

def words(text):
    # find words
    if text == "":
        return {}

    _words = text.split(" ")
    result = {}
    for i in set(_words):
        result[i.lower()] = _words.count(i)

    # check if this is a word
    for i in result.copy():
        if not any(k.isalpha() for k in i):
            result.pop(i)

    return result

def top_3_words(text):
    cleared_text = clear(text)
    words_from_text = words(cleared_text)
    result = sorted(words_from_text,key=words_from_text.get,reverse=True)
    return result[:3]
1 Beğeni

Oncelikle:

class Test:
    @staticmethod
    def assert_equals(a, b): assert a == b, f"{a} should equal to {b}"
import re

#get_frequences = lambda text: {value: text.count(value) / len(text) for value in set(text)}
get_frequencies = lambda words: { word: words.count(word) / len(words) for word in set(words) }

def top_3_words(text):
    words = []
    while True:
        #> A word is a string of letters (A to Z) optionally containing one or more apostrophes (') in ASCII. (No need to handle fancy punctuation.)
        match = re.search('\'?[a-zA-Z]+[a-zA-Z\']*', text)
        if match is None:
            break
        words.append(match.group(0))
        text = text[match.span()[1]:]

    #> Matches should be case-insensitive, and the words in the result should be lowercased.
    lower_words = list(map(str.lower, words))

    sorted_words_freqs = sorted(get_frequencies(lower_words).items(), key=lambda w_f: w_f[1], reverse=True)
    sorted_words = list(map(lambda w_f: w_f[0], sorted_words_freqs))

    #> If a text contains fewer than three unique words, then either the top-2 or top-1 words should be returned, or an empty array if a text contains no words.
    return sorted_words[:3]

Edit: regexi ([a-zA-Z']*)([a-zA-Z]+)([a-zA-Z']*) yapsak daha mi okunakli olur acaba

2 Beğeni