Update scraper.ts — Write improved function for web scraping the UD website
This commit is contained in:
parent
8c82661bac
commit
6a896efaef
|
@ -0,0 +1,127 @@
|
|||
import axios from "axios";
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
// TODO: Move promise handling function to util file
|
||||
/**
|
||||
* Util function that resolves a Promise
|
||||
* @param promise The Promise to be handled
|
||||
* @returns whether the Promises was successfully resolved & its result
|
||||
*/
|
||||
async function handlePromise(promise: Promise<any>) {
|
||||
try {
|
||||
const data = await promise;
|
||||
|
||||
return [true, data];
|
||||
} catch (err) {
|
||||
return [false, err];
|
||||
}
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// Rewrite in organized manner
|
||||
// Retrive author
|
||||
// Retrieve ratings
|
||||
// Retrieve date
|
||||
// Retrieve example(s)
|
||||
|
||||
type rating = {
|
||||
upvotes: number;
|
||||
downvotes: number;
|
||||
};
|
||||
|
||||
type author = {
|
||||
name: string;
|
||||
url: string;
|
||||
};
|
||||
|
||||
type defintion = {
|
||||
meaning: string;
|
||||
example: string;
|
||||
rating: rating;
|
||||
author: author;
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets the definition for a term
|
||||
* @param term The term to be defined
|
||||
*/
|
||||
function getDefinitions(term: string) {
|
||||
// TODO: Accept params that limit/filter the definition
|
||||
// i.e: Web scrape from page N
|
||||
// Only web scrape N results
|
||||
|
||||
return new Promise(async function (resolve, reject) {
|
||||
const [resolved, data] = await handlePromise(
|
||||
axios.get(`https://www.urbandictionary.com/define.php?term=${term}`)
|
||||
);
|
||||
|
||||
const defintions: defintion[] = [];
|
||||
|
||||
if (resolved == true) {
|
||||
const html = data.data;
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
// TODO: Look into selector, as divs that do not contain definitions
|
||||
// are being selected too
|
||||
const definitionDivs = $(".p-5");
|
||||
|
||||
definitionDivs.each(function (i, element) {
|
||||
const meaning = $(this).find(".meaning.mb-4").first().text();
|
||||
const example = $(this).find(".example.italic.mb-4").first().text();
|
||||
|
||||
const authorDiv = $(this).find(".contributor");
|
||||
|
||||
const author = $(authorDiv).find("a").first();
|
||||
const authorName = author.text().trim();
|
||||
const authorLink = author.attr("href");
|
||||
|
||||
// TODO: Finish ratings
|
||||
// const upvotes = $(this).find(".thumbs[data-direction=up]").find("span");
|
||||
|
||||
// @ts-ignore
|
||||
const definition = {
|
||||
meaning: meaning,
|
||||
example: example,
|
||||
rating: {
|
||||
upvotes: 0,
|
||||
downvotes: 0,
|
||||
},
|
||||
author: {
|
||||
name: authorName,
|
||||
url: authorLink!,
|
||||
},
|
||||
};
|
||||
|
||||
//console.log(definition);
|
||||
|
||||
defintions.push(definition);
|
||||
});
|
||||
}
|
||||
|
||||
resolve(defintions);
|
||||
});
|
||||
}
|
||||
|
||||
getDefinitions("yogurt");
|
||||
|
||||
export default function scrapeData(url: string) {
|
||||
return new Promise(async function (resolve, reject) {
|
||||
const [success, data] = await handlePromise(axios.get(url));
|
||||
|
||||
if (success) {
|
||||
const html = data.data;
|
||||
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
// For now, only scraping the first/top meaning;
|
||||
|
||||
const meaning = $(".meaning.mb-4").first().text().trim();
|
||||
|
||||
resolve(meaning);
|
||||
} else {
|
||||
console.error(data);
|
||||
|
||||
reject(data);
|
||||
}
|
||||
});
|
||||
}
|
Loading…
Reference in New Issue