Update scraper.ts — Write improved function for web scraping the UD website

This commit is contained in:
Amelia 2022-06-04 17:07:51 +01:00
parent 8c82661bac
commit 6a896efaef
1 changed files with 127 additions and 0 deletions

127
src/scraper.ts Normal file
View File

@ -0,0 +1,127 @@
import axios from "axios";
import * as cheerio from "cheerio";
// TODO: Move promise handling function to util file
/**
* Util function that resolves a Promise
* @param promise The Promise to be handled
* @returns whether the Promises was successfully resolved & its result
*/
async function handlePromise(promise: Promise<any>) {
try {
const data = await promise;
return [true, data];
} catch (err) {
return [false, err];
}
}
// TODO:
// Rewrite in organized manner
// Retrive author
// Retrieve ratings
// Retrieve date
// Retrieve example(s)
type rating = {
upvotes: number;
downvotes: number;
};
type author = {
name: string;
url: string;
};
type defintion = {
meaning: string;
example: string;
rating: rating;
author: author;
};
/**
* Gets the definition for a term
* @param term The term to be defined
*/
function getDefinitions(term: string) {
// TODO: Accept params that limit/filter the definition
// i.e: Web scrape from page N
// Only web scrape N results
return new Promise(async function (resolve, reject) {
const [resolved, data] = await handlePromise(
axios.get(`https://www.urbandictionary.com/define.php?term=${term}`)
);
const defintions: defintion[] = [];
if (resolved == true) {
const html = data.data;
const $ = cheerio.load(html);
// TODO: Look into selector, as divs that do not contain definitions
// are being selected too
const definitionDivs = $(".p-5");
definitionDivs.each(function (i, element) {
const meaning = $(this).find(".meaning.mb-4").first().text();
const example = $(this).find(".example.italic.mb-4").first().text();
const authorDiv = $(this).find(".contributor");
const author = $(authorDiv).find("a").first();
const authorName = author.text().trim();
const authorLink = author.attr("href");
// TODO: Finish ratings
// const upvotes = $(this).find(".thumbs[data-direction=up]").find("span");
// @ts-ignore
const definition = {
meaning: meaning,
example: example,
rating: {
upvotes: 0,
downvotes: 0,
},
author: {
name: authorName,
url: authorLink!,
},
};
//console.log(definition);
defintions.push(definition);
});
}
resolve(defintions);
});
}
getDefinitions("yogurt");
export default function scrapeData(url: string) {
return new Promise(async function (resolve, reject) {
const [success, data] = await handlePromise(axios.get(url));
if (success) {
const html = data.data;
const $ = cheerio.load(html);
// For now, only scraping the first/top meaning;
const meaning = $(".meaning.mb-4").first().text().trim();
resolve(meaning);
} else {
console.error(data);
reject(data);
}
});
}