<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://www.r512.com/index.php?action=history&amp;feed=atom&amp;title=Python_Web_Scraping</id>
	<title>Python Web Scraping - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://www.r512.com/index.php?action=history&amp;feed=atom&amp;title=Python_Web_Scraping"/>
	<link rel="alternate" type="text/html" href="https://www.r512.com/index.php?title=Python_Web_Scraping&amp;action=history"/>
	<updated>2026-05-01T03:47:14Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.37.1</generator>
	<entry>
		<id>https://www.r512.com/index.php?title=Python_Web_Scraping&amp;diff=1958&amp;oldid=prev</id>
		<title>Bacchas at 16:20, 1 September 2020</title>
		<link rel="alternate" type="text/html" href="https://www.r512.com/index.php?title=Python_Web_Scraping&amp;diff=1958&amp;oldid=prev"/>
		<updated>2020-09-01T16:20:50Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 16:20, 1 September 2020&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l92&quot;&gt;Line 92:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 92:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==[[#top|Back To Top]] - [[Python|Category]]==&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==[[#top|Back To Top]] - [[Python|Category]]==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;[[Category:Python]]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Bacchas</name></author>
	</entry>
	<entry>
		<id>https://www.r512.com/index.php?title=Python_Web_Scraping&amp;diff=1894&amp;oldid=prev</id>
		<title>Bacchas at 13:39, 13 March 2020</title>
		<link rel="alternate" type="text/html" href="https://www.r512.com/index.php?title=Python_Web_Scraping&amp;diff=1894&amp;oldid=prev"/>
		<updated>2020-03-13T13:39:59Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 13:39, 13 March 2020&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l91&quot;&gt;Line 91:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 91:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br/&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br/&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;==[[#top|Back To Top]] - [[Python|Category]]==&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Bacchas</name></author>
	</entry>
	<entry>
		<id>https://www.r512.com/index.php?title=Python_Web_Scraping&amp;diff=1878&amp;oldid=prev</id>
		<title>Bacchas: Created page with &quot;==Web Scraping== &lt;pre&gt; # pip install bs4, requests, pandas # install them one at a time import requests from bs4 import BeautifulSoup import pandas as pd nj = &#039;https://forecas...&quot;</title>
		<link rel="alternate" type="text/html" href="https://www.r512.com/index.php?title=Python_Web_Scraping&amp;diff=1878&amp;oldid=prev"/>
		<updated>2020-01-03T20:21:23Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;==Web Scraping== &amp;lt;pre&amp;gt; # pip install bs4, requests, pandas # install them one at a time import requests from bs4 import BeautifulSoup import pandas as pd nj = &amp;#039;https://forecas...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;==Web Scraping==&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
# pip install bs4, requests, pandas&lt;br /&gt;
# install them one at a time&lt;br /&gt;
import requests&lt;br /&gt;
from bs4 import BeautifulSoup&lt;br /&gt;
import pandas as pd&lt;br /&gt;
nj = &amp;#039;https://forecast.weather.gov/MapClick.php?lat=40.89165000000003&amp;amp;lon=-74.04688499999997#.XgvA5xdKhUQ&amp;#039;&lt;br /&gt;
alaska = &amp;#039;https://forecast.weather.gov/MapClick.php?lat=64.0003&amp;amp;lon=-150.0003#.XgvO1BdKhUQ&amp;#039;&lt;br /&gt;
page = requests.get(alaska)&lt;br /&gt;
soup = BeautifulSoup(page.content, &amp;#039;html.parser&amp;#039;)&lt;br /&gt;
# print(soup.find_all(&amp;#039;a&amp;#039;)) # find all a tags&lt;br /&gt;
week = soup.find(id=&amp;#039;seven-day-forecast-body&amp;#039;)&lt;br /&gt;
items = (week.find_all(class_=&amp;#039;tombstone-container&amp;#039;))&lt;br /&gt;
#print(items[0])&lt;br /&gt;
&lt;br /&gt;
item1 = items[0].find(class_=&amp;#039;period-name&amp;#039;).get_text()&lt;br /&gt;
item2 = items[0].find(class_=&amp;#039;short-desc&amp;#039;).get_text()&lt;br /&gt;
item3 = items[0].find(class_=&amp;#039;temp&amp;#039;).get_text()&lt;br /&gt;
#print(&amp;#039;Weather: &amp;#039; + item1 + &amp;#039; - &amp;#039; + item2 + &amp;#039;  - &amp;#039; + item3)&lt;br /&gt;
&lt;br /&gt;
period_names = [item.find(class_=&amp;#039;period-name&amp;#039;).get_text() for item in items]&lt;br /&gt;
short_desc = [item.find(class_=&amp;#039;short-desc&amp;#039;).get_text() for item in items]&lt;br /&gt;
temp = [item.find(class_=&amp;#039;temp&amp;#039;).get_text() for item in items]&lt;br /&gt;
#print(period_names)&lt;br /&gt;
#print(short_desc)&lt;br /&gt;
#print(temp)&lt;br /&gt;
&lt;br /&gt;
weather_stuff = pd.DataFrame(&lt;br /&gt;
  {&lt;br /&gt;
    &amp;#039;period&amp;#039;: period_names,&lt;br /&gt;
    &amp;#039;short_description&amp;#039;: short_desc,&lt;br /&gt;
    &amp;#039;temperatures&amp;#039;: temp&lt;br /&gt;
  }&lt;br /&gt;
)&lt;br /&gt;
&lt;br /&gt;
print(weather_stuff)&lt;br /&gt;
weather_stuff.to_csv(&amp;#039;alaska.csv&amp;#039;)&lt;br /&gt;
weather_stuff.to_html(&amp;#039;alaska.html&amp;#039;)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
# pip install bs4, requests, pandas, lxml&lt;br /&gt;
# install them one at a time&lt;br /&gt;
import requests&lt;br /&gt;
from bs4 import BeautifulSoup&lt;br /&gt;
import pandas as pd&lt;br /&gt;
import lxml&lt;br /&gt;
import csv&lt;br /&gt;
import sys&lt;br /&gt;
&lt;br /&gt;
reload(sys)&lt;br /&gt;
sys.setdefaultencoding(&amp;#039;utf8&amp;#039;)&lt;br /&gt;
&lt;br /&gt;
source = requests.get(&amp;#039;http://coreyms.com&amp;#039;).text&lt;br /&gt;
&lt;br /&gt;
soup = BeautifulSoup(source, &amp;#039;lxml&amp;#039;)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
csv_file = open(&amp;#039;web_scrape.csv&amp;#039;, &amp;#039;w&amp;#039;)&lt;br /&gt;
csv_writer = csv.writer(csv_file)&lt;br /&gt;
csv_writer.writerow([&amp;#039;headline&amp;#039;, &amp;#039;summary&amp;#039;, &amp;#039;vidsource&amp;#039;])&lt;br /&gt;
&lt;br /&gt;
for article in soup.find_all(&amp;#039;article&amp;#039;):&lt;br /&gt;
    headline = article.h2.a.text&lt;br /&gt;
    summary = article.find(&amp;#039;div&amp;#039;, class_=&amp;#039;entry-content&amp;#039;).p.text&lt;br /&gt;
    #get the value of the source&lt;br /&gt;
    try:&lt;br /&gt;
        vidsource = article.find(&amp;#039;iframe&amp;#039;)[&amp;#039;src&amp;#039;]&lt;br /&gt;
    except TypeError:&lt;br /&gt;
        vidsource= &amp;#039;* No video&amp;#039;&lt;br /&gt;
&lt;br /&gt;
    #parsing out part of a string&lt;br /&gt;
    # eg: http://youtube.com/embed/12345-7o?version=3&amp;amp;rel=1&amp;amp;fs&lt;br /&gt;
    #vid_id = vidsource.split(&amp;#039;/)[4]&lt;br /&gt;
    # this would split the url using the / and the 4th split is the utube id&lt;br /&gt;
    # vid_id = vid_id.split(&amp;#039;?&amp;#039;)[0]&lt;br /&gt;
    # put the link together, the &amp;quot;f&amp;quot; means formatted: youtube prefix url is always the same&lt;br /&gt;
    # yt_link = f&amp;#039;https://youtube.com/watch?v={vid_id}&amp;#039;&lt;br /&gt;
    #print(article.prettify())&lt;br /&gt;
    print(headline)&lt;br /&gt;
    print(summary)&lt;br /&gt;
    print(vidsource)&lt;br /&gt;
    print(&amp;#039;--------------------------&amp;#039;)&lt;br /&gt;
    csv_writer.writerow([headline, summary, vidsource])&lt;br /&gt;
csv_file.close()&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;/div&gt;</summary>
		<author><name>Bacchas</name></author>
	</entry>
</feed>