Merhaba, ben bir proje yaptım ancak proje kodları çok düz yazıldı, belki birazda amatörce. Ben bu kodları oop’ ye çevirmek istiyorum ancak yapamadım. Bu konuda bana yol göstermenizi rica edeceğim.
mycursor = mydb.cursor()
urllib3.disable_warnings()
sql = "SELECT ad_link FROM adlinks_de_mail"
mycursor.execute(sql)
myresult = mycursor.fetchall()
all_links = myresult[0:]
len_all_links = len(all_links)
dataframe = pd.DataFrame(all_links, columns=['links'])
x = 4700
y = 4701
#def fonksiyon(i):
#global x
#global y
number = np.arange(x,y)
for i in tqdm(number):
ad_link = dataframe.links[i]
print(ad_link)
Display = []
prefs = {"profile.managed_default_content_settings.images": 2}
options = Options()
options = webdriver.ChromeOptions()
time.sleep(5)
service = Service(executable_path = r'C:\Users\W\Desktop\WebScraping\chromedriver.exe')
test = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
test.delete_all_cookies()
sleep_time = 5
test.get(ad_link)
time.sleep(sleep_time)
test.execute_script("window.resizeTo(1920,1080)")
ad_source = test.page_source
ad_soup = BeautifulSoup(ad_source, 'lxml')
mainresults = ad_soup.find_all('div', {'class': 'cBox cBox--content u-overflow-inherit '})
try:
WebDriverWait(test, timeout=10).until(
lambda d: d.find_element(By.XPATH, "//button[@class='sc-bczRLJ iBneUr mde-consent-accept-btn']")).click()
WebDriverWait(test, timeout=10).until(
lambda d: d.find_element(By.XPATH, "//p[@class='phone-number-container']")).click()
tel_number = test.find_element(By.XPATH, "//p[@class='phone-number-container']").text
ic(tel_number)
except:
tel_number = 'Not Found Tel Number'
ic(tel_number)
time.sleep(1)
search_words = ""
try:
web_text = test.find_element(By.XPATH, "/html/body/div[6]/div/div[2]/div[3]/div[1]")
words = ["Import", "Importauto"]
search_words = [word for word in words if re.findall(word, web_text)]
text_words = ''
if search_words:
for i, word in enumerate(search_words):
if i < len(search_words) - 1:
text_words += f"{word}, "
else:
text_words += f"{word}."
ic(f"\nCannot send mail because it contains the word.Index : {text_words}")
ic(re.findall)
print("İf tamamlandı")
print("Try tamamlandı")
except Exception:
text_words = "Not Found Words"
ic(text_words)
time.sleep(1)
try:
brand_and_model = ad_soup.find("h1", {"class": ('h2 u-text-break-word')}).get_text()
except:
brand_and_model = ' '
try:
model_version = ad_soup.find("div", {"class": ('listing-subtitle')}).get_text()
except:
model_version = ' '
try:
location = ad_soup.find("p", {"class": ('seller-address')}).get_text()
except:
location = ' '
try:
url_id = ad_soup.find(" ", {"class": ('')}).get_text()
except:
url_id = ''
cars_data = pd.DataFrame({
'brand_and_model': brand_and_model,
'model_version': model_version,
'location': location,
'tel_number': tel_number,
'url_id': url_id,
},
index=[0])
try:
table_pre = ad_soup.find("div", {"class": "cBox cBox--content cBox-body"}) # 1 (6 in one)
all_div = table_pre.findAll("div", {"class": ('key-feature__content')}) # 6 (2 in one)
all_title = table_pre.findAll("div", {"class": ('key-feature__label')}) # 6
all_results = table_pre.findAll("div", {"class": ('key-feature__value')}) # 6
except:
pass
description_list = []
value_list = []
try:
div_length = len(all_div)
except:
div_length = 6
for i in range(div_length):
try:
description_list.append(all_title[i].text)
description_list = list(map(lambda x: x.replace(" ", "_"), description_list))
value_list.append(all_results[i].text)
except:
description_list.append('')
value_list.append('')
all_key = []
all_value = []
try:
pdiv = ad_soup.find_all('div', {'class': 'bullet-list'})
except:
pass
equipment_key = []
try:
equipment_key_length = len(pdiv)
except:
equipment_key_length = 1
equipment_value = []
try:
dd_ul_li_length = len(pdiv)
except:
dd_ul_li_length = 1
df3 = pd.DataFrame(list(zip(equipment_key, equipment_value)), columns=['all_key', 'all_value'])
df2 = pd.DataFrame(list(zip(all_key, all_value)), columns=['all_key', 'all_value'])
df1 = pd.DataFrame(list(zip(description_list, value_list)), columns=['description_list', 'value_list'])
df1 = df1.set_index('description_list').T.reset_index(drop=True)
df1 = df1.rename_axis(None, axis=1)
df1['link'] = ad_link
df1.insert(0, "brand_and_model", brand_and_model)
df1.insert(1, "model_version", model_version)
df1.insert(2, "location", location)
df1.insert(5, "tel_number", tel_number)
df2_3 = pd.concat([df2, df3])
df2_3 = df2_3.set_index('all_key').T.reset_index(drop=True)
df2_3 = df2_3.rename_axis(None, axis=1)
df_last = pd.concat([df1, df2_3], axis=1)
df_last = df_last.astype(str).groupby(df_last.columns, sort=False, axis=1).agg(
lambda x: x.apply(','.join, 1))
now = datetime.now()
datetime_string = str(now.strftime("%Y%m%d_%H%M%S"))
df_last['ad_link'] = ad_link
df_last['download_date_time'] = datetime_string
config = configparser.RawConfigParser()
config.read(filenames='my.properties')
scrap_db = pymysql.connect(host='localhost', user='root', password='', database='mobile_maillll',
charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)
cursor = scrap_db.cursor()
sql = """CREATE TABLE CARS(
brand_and_model VARCHAR(32),
model_version VARCHAR(64),
location VARCHAR(64),
tel_number VARCHAR(32),
mileage VARCHAR(32),
first_registration DATE(7),
ad_link VARCHAR(256),
download_date_time DATE(32),
search words VARCHAR(64)
url_id int(9)
)"""
#cursor.execute(sql) #Save data to the table
for row_count in range(0, df_last.shape[0]):
chunk = df_last.iloc[row_count:row_count + 1, :].values.tolist()
brand_and_model = ""
model_version = ""
location = ""
tel_number = ""
mileage = ""
first_registration = ""
ad_link = ""
download_date_time = ""
url_id = ""
lenght_of_chunk = len(chunk[0])
if "brand_and_model" in cars_data:
try:
brand_and_model = chunk[0][0]
except:
brand_and_model = ""
if "model_version" in cars_data:
try:
model_version = chunk[0][1]
except:
model_version = ""
if "location" in cars_data:
try:
location = chunk[0][2]
except:
location = ""
if "tel_number" in cars_data:
try:
tel_number = chunk[0][5]
except:
tel_number = ""
if "Kilometerstand" in description_list:
index_no = description_list.index("Kilometerstand")
try:
mileage = value_list[index_no]
except:
mileage = ""
if "Erstzulassung" in description_list:
index_no = description_list.index("Erstzulassung")
try:
first_registration = value_list[index_no]
except:
first_registration = ""
if chunk[0][lenght_of_chunk - 2] != "":
ad_link = chunk[0][lenght_of_chunk - 2] # ad_link
if chunk[0][lenght_of_chunk - 1] != "":
download_date_time = chunk[0][lenght_of_chunk - 1]
if (brand_and_model == ' '):
control = "false"
else:
control = "true"
if control == "true":
mySql_insert_query = "INSERT INTO CARS(brand_and_model,model_version,location,tel_number,mileage,first_registration,ad_link,download_date_time,url_id) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
val = (brand_and_model, model_version, location, tel_number, mileage, first_registration, ad_link, download_date_time, url_id)