function extractIngredients(url) {
try {
Logger.log('Fetching URL: ' + url);
var response = UrlFetchApp.fetch(url);
var html = response.getContentText();
// Log the HTML content for debugging
Logger.log('HTML Content: ' + html.substring(0, 500)); // Log first 500 characters for brevity
// Parse the HTML to extract the ingredients
var ingredients = [];
var ingredientSectionMatch = html.match(/<ul[^>]*class="[^"]*ingredients[^"]*"[^>]*>([\s\S]*?)<\/ul>/);
if (ingredientSectionMatch) {
var ingredientSection = ingredientSectionMatch[1];
Logger.log('Ingredient Section: ' + ingredientSection.substring(0, 500)); // Log first 500 characters for brevity
var ingredientMatches = ingredientSection.match(/<li[^>]*>([\s\S]*?)<\/li>/g);
if (ingredientMatches) {
ingredientMatches.forEach(function(ingredientHtml) {
var ingredientText = ingredientHtml.replace(/<\/?[^>]+(>|$)/g, ""); // Remove HTML tags
ingredientText = decodeHtmlEntities(ingredientText).replace(/\s+/g, ' ').trim(); // Clean up spacing
ingredients.push(ingredientText);
});
}
} else {
Logger.log('No ingredient section found for URL: ' + url);
}
Logger.log('Extracted Ingredients: ' + ingredients);
return ingredients;
} catch (e) {
Logger.log('Error fetching or parsing URL: ' + url);
Logger.log(e.toString());
return [];
}
}
function decodeHtmlEntities(text) {
var entities = {
' ': ' ',
'!': '!',
'"': '"',
'#': '#',
'$': '$',
'%': '%',
'&': '&',
''': "'",
'(': '(',
')': ')',
'*': '*',
'+': '+',
',': ',',
'-': '-',
'.': '.',
'/': '/',
':': ':',
';': ';',
'<': '<',
'=': '=',
'>': '>',
'?': '?',
'@': '@',
'[': '[',
'\': '\\',
']': ']',
'^': '^',
'_': '_',
'`': '`',
'{': '{',
'|': '|',
'}': '}',
'~': '~',
'"': '"',
'&': '&',
''': "'",
'<': '<',
'>': '>',
' ': ' ',
'¡': '¡',
'¢': '¢',
'£': '£',
'¤': '¤',
'¥': '¥',
'¦': '¦',
'§': '§',
'¨': '¨',
'©': '©',
'ª': 'ª',
'«': '«',
'¬': '¬',
'®': '®',
'¯': '¯',
'°': '°',
'±': '±',
'²': '²',
'³': '³',
'´': '´',
'µ': 'µ',
'¶': '¶',
'·': '·',
'¸': '¸',
'¹': '¹',
'º': 'º',
'»': '»',
'¼': '¼',
'½': '½',
'¾': '¾',
'¿': '¿',
'×': '×',
'÷': '÷',
'▢': '□',
'■': '■',
'': '', // Additional special entity for zero-width space
'▢': '', // Specific entity to be replaced
'’': "'", // Right single quotation mark
'“': '"', // Left double quotation mark
'”': '"', // Right double quotation mark
'…': '...', // Ellipsis
'–': '-', // En dash
'—': '-', // Em dash
'&': '&', // Ampersand
''': "'", // Apostrophe
'■': '' // Additional check for any other characters
};
return text.replace(/&#[0-9a-zA-Z]+;/g, function(match) {
return entities[match] || '';
});
}
function categorizeIngredients(ingredients) {
var meatKeywords = ["chicken", "beef", "pork", "sausage", "bacon", "ham", "turkey", "lamb", "fish", "shrimp"];
var produceKeywords = ["onion", "garlic", "pepper", "tomato", "lettuce", "spinach", "carrot", "celery", "potato", "avocado"];
var meat = [];
var produce = [];
var others = [];
ingredients.forEach(function(ingredient) {
var lowerIngredient = ingredient.toLowerCase();
if (meatKeywords.some(keyword => lowerIngredient.includes(keyword))) {
meat.push(ingredient);
} else if (produceKeywords.some(keyword => lowerIngredient.includes(keyword))) {
produce.push(ingredient);
} else {
others.push(ingredient);
}
});
return meat.concat(produce).concat(others);
}
function categorizeRecipe(ingredients) {
var meatKeywords = ["chicken", "beef", "pork", "sausage", "bacon", "ham", "turkey", "lamb", "fish", "shrimp"];
for (var i = 0; i < meatKeywords.length; i++) {
if (ingredients.some(ingredient => ingredient.toLowerCase().includes(meatKeywords[i]))) {
return meatKeywords[i];
}
}
return "other";
}
function onOpen() {
var ui = SpreadsheetApp.getUi();
ui.createMenu('Recipe Organizer')
.addItem('Extract Ingredients', 'extractAllIngredients')
.addItem('Arrange Recipes', 'arrangeRecipes')
.addToUi();
}
function extractAllIngredients() {
var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
var data = sheet.getDataRange().getValues();
for (var i = 1; i < data.length; i++) { // Assuming the first row is headers
var url = data[i][0]; // URLs are in the first column
Logger.log('Processing row ' + (i + 1) + ' with URL: ' + url);
var ingredients = extractIngredients(url);
var categorizedIngredients = categorizeIngredients(ingredients);
sheet.getRange(i + 1, 2).setValue(categorizedIngredients.join(", ")); // Ingredients in the second column
}
}
function arrangeRecipes() {
var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
var data = sheet.getDataRange().getValues();
var recipes = [];
for (var i = 1; i < data.length; i++) {
var ingredients = data[i][1].split(", ");
var meatType = categorizeRecipe(ingredients);
recipes.push({ row: i + 1, meatType: meatType, ingredients: ingredients });
}
recipes.sort(function(a, b) {
return a.meatType.localeCompare(b.meatType) || a.ingredients.length - b.ingredients.length;
});
var newOrder = [data[0]]; // Headers
recipes.forEach(function(recipe) {
newOrder.push(data[recipe.row - 1]);
});
sheet.getRange(1, 1, newOrder.length, newOrder[0].length).setValues(newOrder);
}