https://data36.com/web-scraping-tutorial-episode-1-scraping-a-webpage-with-bash/
sudo apt-get install html2text
curl https://www.ted.com/talks/sir_ken_robinson_do_schools_kill_creativity/transcript | html2text | sed -n '/Details About the talk/,$p' | sed -n '/Programs &. initiatives/q;p' | head -n-1 | tail -n+2 > proto_text.csv
No comments:
Post a Comment