import time\nfrom selenium import webdriver\nfrom selenium.webdriver.common.keys import Keys\nimport pandas as pd\nfrom selenium.webdriver.common.by import By\nfrom selenium.common.exceptions import NoSuchElementException\nimport tkinter as tk\nfrom tkinter import messagebox, ttk\nfrom tqdm import tqdm\n\ndef scrape_and_save():\n subject = subject_entry.get()\n if not subject:\n messagebox.showerror(\"\uc5d0\ub7ec\", \"\uc8fc\uc81c\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694.\")\n return\n\n driver = webdriver.Chrome()\n url = 'https:\/\/search.naver.com\/search.naver?where=view&sm=tab_jum&query=' + subject\n driver.get(url)\n time.sleep(3)\n\n body = driver.find_element(By.TAG_NAME, 'body')\n\n for i in tqdm(range(5), desc=\"\uac80\uc0c9 \uacb0\uacfc \uc2a4\ud06c\ub798\ud551 \uc9c4\ud589 \uc911\"):\n body.send_keys(Keys.PAGE_DOWN)\n time.sleep(1)\n\n posts = driver.find_elements(By.CSS_SELECTOR, 'ul.lst_total._list_base > li')\n\n titles = []\n descriptions = []\n urls = []\n \n\n for post in posts:\n try:\n title = post.find_element(By.CSS_SELECTOR, 'a.api_txt_lines.total_tit._cross_trigger').text\n description = post.find_element(By.CSS_SELECTOR, 'div.api_txt_lines.dsc_txt').text\n url = post.find_element(By.CSS_SELECTOR, 'a.api_txt_lines.total_tit._cross_trigger').get_attribute('href')\n \n \n titles.append(title)\n descriptions.append(description)\n urls.append(url)\n \n except NoSuchElementException:\n continue\n\n df = pd.DataFrame({'Title': titles, 'Description': descriptions, 'URL': urls})\n output_file = 'output.xlsx'\n df.to_excel(output_file, index=False)\n\n driver.quit()\n messagebox.showinfo(\"\uc644\ub8cc\", f\"\ub370\uc774\ud130 \uc2a4\ud06c\ub798\ud551\uc774 \uc644\ub8cc\ub418\uc5c8\uc2b5\ub2c8\ub2e4.\\n{output_file} \ud30c\uc77c\uc744 \ud655\uc778\ud558\uc138\uc694.\")\n\n # \ud654\uba74\uc5d0 Excel \ud30c\uc77c \ub0b4\uc6a9 \ud45c\uc2dc\n show_excel_data(df)\n\ndef show_excel_data(df):\n excel_window = tk.Toplevel()\n excel_window.title(\"Excel \ud30c\uc77c \ub0b4\uc6a9\")\n excel_window.geometry(\"800x400\")\n\n tree_view = ttk.Treeview(excel_window)\n tree_view[\"columns\"] = tuple(df.columns)\n tree_view.heading(\"#0\", text=\"Index\", anchor=\"center\")\n tree_view.column(\"#0\", anchor=\"center\", width=50, stretch=False)\n\n for column in df.columns:\n tree_view.heading(column, text=column, anchor=\"center\")\n tree_view.column(column, anchor=\"center\", width=150, stretch=False)\n\n for index, row in df.iterrows():\n tree_view.insert(\"\", \"end\", text=index, values=tuple(row))\n\n tree_view.pack(fill=\"both\", expand=True)\n\n# GUI \uc0dd\uc131\nwindow = tk.Tk()\nwindow.title(\"\ub124\uc774\ubc84 \uac80\uc0c9 \uacb0\uacfc \uc2a4\ud06c\ub798\ud551\")\nwindow.geometry(\"400x200\")\n\nsubject_label = tk.Label(window, text=\"\uc8fc\uc81c\ub97c \uc785\ub825\ud558\uc138\uc694:\")\nsubject_label.pack(pady=10)\n\nsubject_entry = tk.Entry(window, width=30)\nsubject_entry.pack(pady=5)\n\nscrape_button = tk.Button(window, text=\"\uac80\uc0c9 \uacb0\uacfc \uc2a4\ud06c\ub798\ud551\", command=scrape_and_save)\nscrape_button.pack(pady=10)\n\nwindow.mainloop()<\/code><\/pre>\n\n<\/div>\n<\/div>\n\n1. \ud544\uc694\ud55c \ub77c\uc774\ube0c\ub7ec\ub9ac \uac00\uc838\uc624\uae30: \ucf54\ub4dc\ub294 `time`, `selenium`, `pandas`, `tkinter`, `messagebox`, `ttk`, `tqdm` \ub4f1 \ud544\uc694\ud55c \ub77c\uc774\ube0c\ub7ec\ub9ac\ub97c \uac00\uc838\uc635\ub2c8\ub2e4.<\/p>\n
\ubb3c\ub860! \uc694\uccad\ud558\uc2e0 \ub77c\uc774\ube0c\ub7ec\ub9ac\uc5d0 \ub300\ud574 \uac04\ub2e8\ud788 \uc124\uba85\ub4dc\ub9ac\uaca0\uc2b5\ub2c8\ub2e4.<\/p>\n
time : time\uc740 \ud30c\uc774\uc36c \ub0b4\uc7a5 \ub77c\uc774\ube0c\ub7ec\ub9ac\ub85c, \uc2dc\uac04\uacfc \uad00\ub828\ub41c \uae30\ub2a5\ub4e4\uc744 \uc81c\uacf5\ud569\ub2c8\ub2e4. \uc608\ub97c \ub4e4\uc5b4, \ud504\ub85c\uadf8\ub7a8 \uc2e4\ud589 \uc18d\ub3c4\ub97c \uce21\uc815\ud558\ub294\ub370 \uc0ac\uc6a9\ud558\uac70\ub098 \ud2b9\uc815 \ucf54\ub4dc \uc2e4\ud589\uc744 \uc9c0\uc5f0\uc2dc\ud0a4\ub294\ub370 \ud65c\uc6a9\ub420 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n
selenium : selenium\uc740 \uc6f9 \ube0c\ub77c\uc6b0\uc800 \uc790\ub3d9\ud654\ub97c \uc704\ud55c \ub3c4\uad6c\uc785\ub2c8\ub2e4. \uc8fc\ub85c \uc6f9 \ud398\uc774\uc9c0 \ud14c\uc2a4\ud2b8 \uc790\ub3d9\ud654\ub098 \uc6f9 \ud06c\ub864\ub9c1 \ub4f1\uc5d0 \uc0ac\uc6a9\ub429\ub2c8\ub2e4.<\/p>\n
pandas : pandas\ub294 \ub370\uc774\ud130 \uc870\uc791\uacfc \ubd84\uc11d\uc744 \uc704\ud55c \uac15\ub825\ud55c \ub77c\uc774\ube0c\ub7ec\ub9ac\ub85c, \ud45c \ud615\ud0dc\ub85c \ub370\uc774\ud130\ub97c \ub2e4\ub8e8\ub294\ub370 \uc720\uc6a9\ud569\ub2c8\ub2e4. DataFrame\uc774\ub77c\ub294 \uc790\ub8cc\uad6c\uc870\ub97c \ud1b5\ud574 \ub370\uc774\ud130\ub97c \uc27d\uac8c \ub2e4\ub8f0 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n
tkinter : tkinter\ub294 \ud30c\uc774\uc36c\uc758 \ud45c\uc900 GUI (\uadf8\ub798\ud53d \uc0ac\uc6a9\uc790 \uc778\ud130\ud398\uc774\uc2a4) \ub77c\uc774\ube0c\ub7ec\ub9ac\uc785\ub2c8\ub2e4. \uc0ac\uc6a9\uc790 \uc778\ud130\ud398\uc774\uc2a4\ub97c \uac1c\ubc1c\ud558\uace0 \uad6c\uc131\ud558\ub294\ub370 \uc0ac\uc6a9\ub429\ub2c8\ub2e4.<\/p>\n
messagebox : messagebox\ub294 `tkinter`\uc640 \uad00\ub828\ud558\uc5ec \uac04\ub2e8\ud55c \ud31d\uc5c5 \uba54\uc2dc\uc9c0\ub97c \ud45c\uc2dc\ud558\ub294\ub370 \uc0ac\uc6a9\ub429\ub2c8\ub2e4. \uc0ac\uc6a9\uc790\uc5d0\uac8c \uc54c\ub9bc\uc774\ub098 \uac04\ub2e8\ud55c \uba54\uc2dc\uc9c0\ub97c \ubcf4\uc5ec\uc904 \ub54c \ud65c\uc6a9\ub429\ub2c8\ub2e4.<\/p>\n
ttk : ttk\ub294 `tkinter`\uc758 \ud14c\ub9c8\ud654\ub41c \uc704\uc82f\uc744 \uc81c\uacf5\ud558\ub294 \ubaa8\ub4c8\uc785\ub2c8\ub2e4. \ubcf4\ub2e4 \ud604\ub300\uc801\uc778 \ub514\uc790\uc778\uacfc \uc2a4\ud0c0\uc77c\uc744 \uc801\uc6a9\ud558\uc5ec GUI\ub97c \uac1c\ubc1c\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n
tqdm : tqdm\uc740 \ub8e8\ud504\uc758 \uc9c4\ud589 \uc0c1\ud0dc\ub97c \ud45c\uc2dc\ud574\uc8fc\ub294 \ub77c\uc774\ube0c\ub7ec\ub9ac\uc785\ub2c8\ub2e4. \ubc18\ubcf5\ubb38\uc758 \uc9c4\ud589\ub960\uc744 \uc2dc\uac01\uc801\uc73c\ub85c \ubcf4\uc5ec\uc8fc\uba70, \uc791\uc5c5\uc774 \uc5bc\ub9c8\ub098 \uc9c4\ud589\ub418\uc5c8\ub294\uc9c0\ub97c \ud655\uc778\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n
\uc704 \ub77c\uc774\ube0c\ub7ec\ub9ac\ub4e4\uc740 \uac01\uac01 \ub2e4\uc591\ud55c \uc6a9\ub3c4\ub85c \ud65c\uc6a9\ub418\uba70, \ud504\ub85c\uc81d\ud2b8\uc5d0 \ub530\ub77c \ud544\uc694\ud55c \uae30\ub2a5\ub4e4\uc744 \ud65c\uc6a9\ud558\uc5ec \uac1c\ubc1c\ud558\uc2dc\uba74 \ub429\ub2c8\ub2e4.<\/p>\n\n\n
2. scrape_and_save() \ud568\uc218: \uc774 \ud568\uc218\ub294 \uc6f9 \uc2a4\ud06c\ub798\ud551\uc744 \uc218\ud589\ud558\uace0 \uacb0\uacfc\ub97c \uc5d1\uc140 \ud30c\uc77c\uc5d0 \uc800\uc7a5\ud558\ub294 \uc5ed\ud560\uc744 \ud569\ub2c8\ub2e4.<\/p>\n\n\n\n
3. GUI \uc694\uc18c: \ucf54\ub4dc\ub294 Tkinter\ub97c \uc0ac\uc6a9\ud558\uc5ec \uadf8\ub798\ud53d \uc0ac\uc6a9\uc790 \uc778\ud130\ud398\uc774\uc2a4 (GUI)\ub97c \ub9cc\ub4ed\ub2c8\ub2e4. \ub808\uc774\ube14, \uc785\ub825 \uc0c1\uc790 \ubc0f \ubc84\ud2bc\uc774 \ud3ec\ud568\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n
4. scrape_and_save() \ud568\uc218 \uc124\uba85:<\/p>\n\n\n\n
– \uc0ac\uc6a9\uc790\uac00 GUI\uc5d0\uc11c “\uac80\uc0c9 \uacb0\uacfc \uc2a4\ud06c\ub798\ud551” \ubc84\ud2bc\uc744 \ud074\ub9ad\ud558\uba74 \uc774 \ud568\uc218\uac00 \ud638\ucd9c\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n
– \ud568\uc218\ub294 \uc0ac\uc6a9\uc790\uac00 \uc785\ub825\ud55c \uc8fc\uc81c\ub97c \uc785\ub825 \uc0c1\uc790\uc5d0\uc11c \uac00\uc838\uc635\ub2c8\ub2e4.<\/p>\n\n\n\n
– \uc8fc\uc81c\uac00 \uc785\ub825\ub418\uc5c8\ub294\uc9c0 \ud655\uc778\ud558\uace0, \uc785\ub825\ub418\uc9c0 \uc54a\uc558\ub2e4\uba74 `messagebox`\ub97c \uc0ac\uc6a9\ud558\uc5ec \uc624\ub958 \uba54\uc2dc\uc9c0\ub97c \ud45c\uc2dc\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n
– Chrome\uc6a9 Selenium WebDriver\ub97c \ucd08\uae30\ud654\ud558\uace0, \uc785\ub825\ud55c \uc8fc\uc81c\uc5d0 \uae30\ubc18\ud558\uc5ec Naver\uc758 \uac80\uc0c9 \uacb0\uacfc \ud398\uc774\uc9c0\ub85c \uc774\ub3d9\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n
– \ucf54\ub4dc\ub294 `Keys.PAGE_DOWN` \ub3d9\uc791\uc744 \uc0ac\uc6a9\ud558\uc5ec \ud398\uc774\uc9c0\ub97c \uc5ec\ub7ec \ubc88 \uc2a4\ud06c\ub864\ud558\uc5ec \ub354 \ub9ce\uc740 \uac80\uc0c9 \uacb0\uacfc\ub97c \ub85c\ub4dc\ud569\ub2c8\ub2e4(5\ud68c \ubc18\ubcf5).<\/p>\n\n\n\n
– \uadf8\ub7f0 \ub2e4\uc74c CSS \uc120\ud0dd\uc790\ub97c \uc0ac\uc6a9\ud558\uc5ec \uac80\uc0c9 \uacb0\uacfc\uc758 \uc81c\ubaa9, \uc124\uba85 \ubc0f URL\uc744 \ucd94\ucd9c\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n
– \ucd94\ucd9c\ud55c \ub370\uc774\ud130\ub294 `titles`, `descriptions`, `urls`\ub77c\ub294 \uc138 \uac1c\uc758 \ubcc4\ub3c4 \ub9ac\uc2a4\ud2b8\uc5d0 \uc800\uc7a5\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n
– \uc774 \ub9ac\uc2a4\ud2b8\ub4e4\uc744 \uc0ac\uc6a9\ud558\uc5ec pandas DataFrame\uc744 \uc0dd\uc131\ud558\uace0, DataFrame\uc740 `output.xlsx`\ub77c\ub294 \uc774\ub984\uc758 \uc5d1\uc140 \ud30c\uc77c\ub85c \uc800\uc7a5\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n
– Selenium WebDriver\ub97c \uc885\ub8cc\ud558\uace0, \uba54\uc2dc\uc9c0 \ubc15\uc2a4\uac00 \uc644\ub8cc \uba54\uc2dc\uc9c0\uc640 \ucd9c\ub825 \ud30c\uc77c\uc758 \uc774\ub984\uc744 \ud45c\uc2dc\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n
– \ud568\uc218\ub294 \ub610\ud55c `show_excel_data()`\ub97c \ud638\ucd9c\ud558\uc5ec \uc5d1\uc140 \ud30c\uc77c\uc758 \ub0b4\uc6a9\uc744 \uc0c8 \ucc3d\uc5d0 \ud45c\uc2dc\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n
5. show_excel_data(df) \ud568\uc218 \uc124\uba85:<\/p>\n\n\n\n
– \uc774 \ud568\uc218\ub294 \uc0c8\ub85c\uc6b4 Tkinter Toplevel \ucc3d\uc5d0\uc11c \uc5d1\uc140 \ud30c\uc77c\uc758 \ub0b4\uc6a9\uc744 \ud45c\uc2dc\ud558\ub294 \uc5ed\ud560\uc744 \ud569\ub2c8\ub2e4.<\/p>\n\n\n\n
– \uc0c8 \ucc3d \ub0b4\uc5d0\uc11c \ub370\uc774\ud130\ub97c \ud45c \ud615\uc2dd\uc73c\ub85c \ud45c\uc2dc\ud558\ub294 Treeview \uc704\uc82f\uc744 \ub9cc\ub4ed\ub2c8\ub2e4.<\/p>\n\n\n\n
– Treeview \uc5f4\uc740 DataFrame\uc758 \uc5f4\uc744 \uae30\uc900\uc73c\ub85c \uc124\uc815\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n
– DataFrame\uc758 \uac01 \ud589\uc5d0 \ub300\ud574 \ud568\uc218\ub294 \ub370\uc774\ud130\ub97c Treeview\uc5d0 \uc0bd\uc785\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n
6. GUI \uc0dd\uc131: \ucf54\ub4dc\ub294 \uc8fc\uc694 Tkinter \ucc3d\uc744 \uc0dd\uc131\ud558\uace0 \uc785\ub825 \uc694\uc18c(\ub808\uc774\ube14, \uc785\ub825 \uc0c1\uc790) \ubc0f “\uac80\uc0c9 \uacb0\uacfc \uc2a4\ud06c\ub798\ud551” \ubc84\ud2bc\uc744 \uc124\uc815\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n
– \uc0ac\uc6a9\uc790\uac00 \ubc84\ud2bc\uc744 \ud074\ub9ad\ud558\uba74 `scrape_and_save()` \ud568\uc218\uac00 \uc2e4\ud589\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n
\uc804\ubc18\uc801\uc73c\ub85c \ucf54\ub4dc\ub294 \uc0ac\uc6a9\uc790\uac00 \uac80\uc0c9 \uc8fc\uc81c\ub97c \uc785\ub825\ud558\uace0 \ubc84\ud2bc\uc744 \ud074\ub9ad\ud558\uba74 Naver\uc5d0\uc11c \uc8fc\uc81c\uc640 \uad00\ub828\ub41c \uac80\uc0c9 \uacb0\uacfc\ub97c \uc2a4\ud06c\ub798\ud551\ud558\uc5ec \uacb0\uacfc\ub97c \uc5d1\uc140 \ud30c\uc77c\ub85c \uc800\uc7a5\ud558\uace0, Tkinter\ub97c \uc0ac\uc6a9\ud558\uc5ec \uc5d1\uc140 \ud30c\uc77c\uc758 \ub0b4\uc6a9\uc744 \ubcc4\ub3c4 \ucc3d\uc5d0 \ud45c\uc2dc\ud560 \uc218 \uc788\ub294 \uae30\ub2a5\uc744 \uc81c\uacf5\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n