Monitorear Google News
Configura una pipeline de alertas de noticias por marca o palabra clave que recopila y deduplica artículos de Google News en tiempo real con la API de Autom.
Descripción general
Este tutorial construye una pipeline de monitoreo de noticias: consulta Google News por nombre de marca, producto o tema y recopila todos los títulos de artículos, fuentes y fechas de publicación. Ejecútalo con un cron job para detectar cobertura de prensa en cuanto aparezca.
Prerrequisitos
- Una clave de API de Autom — consigue una en app.autom.dev
- Instala las dependencias para tu lenguaje:
pip install requestsSin dependencias adicionales — usa la API nativa fetch (Node 18+).
Extensión curl habilitada (activa por defecto).
Sin dependencias adicionales — usa net/http (Go 1.18+).
Sin dependencias adicionales — usa java.net.http (Java 11+).
Sin dependencias adicionales — usa System.Net.Http (.NET 6+).
# Cargo.toml
[dependencies]
reqwest = { version = "0.12", features = ["json"] }
tokio = { version = "1", features = ["full"] }
serde_json = "1"Pasos
Obtén los últimos artículos de noticias
Llama a GET /v1/google/news con la palabra clave que quieres monitorear.
import requests
API_KEY = "YOUR_API_KEY"
response = requests.get(
"https://api.autom.dev/v1/google/news",
headers={"x-api-key": API_KEY},
params={"q": "OpenAI", "gl": "es", "hl": "es"},
)
data = response.json()const API_KEY = "YOUR_API_KEY";
const params = new URLSearchParams({ q: "OpenAI", gl: "es", hl: "es" });
const response = await fetch(`https://api.autom.dev/v1/google/news?${params}`, {
headers: { "x-api-key": API_KEY },
});
const data = await response.json();<?php
$apiKey = "YOUR_API_KEY";
$params = http_build_query(["q" => "OpenAI", "gl" => "es", "hl" => "es"]);
$ch = curl_init("https://api.autom.dev/v1/google/news?{$params}");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, ["x-api-key: {$apiKey}"]);
$data = json_decode(curl_exec($ch), true);
curl_close($ch);package main
import (
"encoding/json"
"io"
"net/http"
"net/url"
)
func main() {
params := url.Values{"q": {"OpenAI"}, "gl": {"es"}, "hl": {"es"}}
req, _ := http.NewRequest("GET", "https://api.autom.dev/v1/google/news?"+params.Encode(), nil)
req.Header.Set("x-api-key", "YOUR_API_KEY")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
var data map[string]any
json.Unmarshal(body, &data)
}import java.net.URI;
import java.net.http.*;
var client = HttpClient.newHttpClient();
var request = HttpRequest.newBuilder()
.uri(URI.create("https://api.autom.dev/v1/google/news?q=OpenAI&gl=es&hl=es"))
.header("x-api-key", "YOUR_API_KEY")
.GET().build();
var response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());using System.Net.Http;
using var client = new HttpClient();
client.DefaultRequestHeaders.Add("x-api-key", "YOUR_API_KEY");
var body = await client.GetStringAsync("https://api.autom.dev/v1/google/news?q=OpenAI&gl=es&hl=es");
Console.WriteLine(body);#[tokio::main]
async fn main() -> Result<(), reqwest::Error> {
let data = reqwest::Client::new()
.get("https://api.autom.dev/v1/google/news")
.header("x-api-key", "YOUR_API_KEY")
.query(&[("q", "OpenAI"), ("gl", "es"), ("hl", "es")])
.send().await?.json::<serde_json::Value>().await?;
println!("{:#?}", data);
Ok(())
}Extrae y muestra los artículos
Cada elemento en organic_results tiene title, link, source, date y snippet.
for article in data.get("organic_results", []):
print(f"[{article['date']}] {article['title']}")
print(f" Fuente : {article['source']}")
print(f" URL : {article['link']}\n")for (const article of data.organic_results ?? []) {
console.log(`[${article.date}] ${article.title}`);
console.log(` Fuente : ${article.source}`);
console.log(` URL : ${article.link}\n`);
}foreach ($data["organic_results"] ?? [] as $article) {
echo "[{$article['date']}] {$article['title']}\n";
echo " Fuente : {$article['source']}\n";
echo " URL : {$article['link']}\n\n";
}results := data["organic_results"].([]any)
for _, r := range results {
a := r.(map[string]any)
fmt.Printf("[%s] %s\n Fuente : %s\n URL : %s\n\n",
a["date"], a["title"], a["source"], a["link"])
}import org.json.*;
var json = new JSONObject(response.body());
var results = json.getJSONArray("organic_results");
for (int i = 0; i < results.length(); i++) {
var a = results.getJSONObject(i);
System.out.printf("[%s] %s%n Fuente : %s%n URL : %s%n%n",
a.getString("date"), a.getString("title"),
a.getString("source"), a.getString("link"));
}using System.Text.Json;
var json = JsonDocument.Parse(body);
var results = json.RootElement.GetProperty("organic_results").EnumerateArray();
foreach (var a in results)
{
Console.WriteLine($"[{a.GetProperty("date")}] {a.GetProperty("title")}");
Console.WriteLine($" Fuente : {a.GetProperty("source")}");
Console.WriteLine($" URL : {a.GetProperty("link")}\n");
}if let Some(articles) = data["organic_results"].as_array() {
for a in articles {
println!("[{}] {}", a["date"].as_str().unwrap_or(""), a["title"].as_str().unwrap_or(""));
println!(" Fuente : {}", a["source"].as_str().unwrap_or(""));
println!(" URL : {}\n", a["link"].as_str().unwrap_or(""));
}
}Construye una pipeline de monitoreo con deduplicación
Almacena las URLs de artículos ya vistos para que las ejecuciones repetidas no generen alertas duplicadas.
import json, requests
from pathlib import Path
API_KEY = "YOUR_API_KEY"
KEYWORDS = ["OpenAI", "Anthropic", "Mistral AI"]
SEEN_FILE = Path("seen_articles.json")
def load_seen() -> set:
return set(json.loads(SEEN_FILE.read_text())) if SEEN_FILE.exists() else set()
def save_seen(seen: set) -> None:
SEEN_FILE.write_text(json.dumps(list(seen)))
def fetch_news(query: str) -> list:
r = requests.get("https://api.autom.dev/v1/google/news",
headers={"x-api-key": API_KEY}, params={"q": query, "gl": "es", "hl": "es"})
return r.json().get("organic_results", [])
seen = load_seen()
new_articles = []
for keyword in KEYWORDS:
for article in fetch_news(keyword):
if article["link"] not in seen:
seen.add(article["link"])
new_articles.append({**article, "keyword": keyword})
save_seen(seen)
print(f"Encontrados {len(new_articles)} artículo(s) nuevo(s):")
for a in new_articles:
print(f" [{a['keyword']}] {a['title']} — {a['source']}")import { readFileSync, writeFileSync, existsSync } from "fs";
const API_KEY = "YOUR_API_KEY";
const KEYWORDS = ["OpenAI", "Anthropic", "Mistral AI"];
const SEEN_FILE = "seen_articles.json";
function loadSeen(): Set<string> {
return existsSync(SEEN_FILE)
? new Set(JSON.parse(readFileSync(SEEN_FILE, "utf-8")))
: new Set();
}
async function fetchNews(query: string): Promise<any[]> {
const params = new URLSearchParams({ q: query, gl: "es", hl: "es" });
const res = await fetch(`https://api.autom.dev/v1/google/news?${params}`, {
headers: { "x-api-key": API_KEY },
});
return (await res.json()).organic_results ?? [];
}
const seen = loadSeen();
const newArticles: any[] = [];
for (const keyword of KEYWORDS) {
for (const article of await fetchNews(keyword)) {
if (!seen.has(article.link)) {
seen.add(article.link);
newArticles.push({ ...article, keyword });
}
}
}
writeFileSync(SEEN_FILE, JSON.stringify([...seen]));
console.log(`Encontrados ${newArticles.length} artículo(s) nuevo(s):`);
for (const a of newArticles) console.log(` [${a.keyword}] ${a.title} — ${a.source}`);<?php
$apiKey = "YOUR_API_KEY";
$keywords = ["OpenAI", "Anthropic", "Mistral AI"];
$seenFile = "seen_articles.json";
$seen = file_exists($seenFile) ? array_flip(json_decode(file_get_contents($seenFile), true)) : [];
$newArticles = [];
foreach ($keywords as $keyword) {
$params = http_build_query(["q" => $keyword, "gl" => "es", "hl" => "es"]);
$ch = curl_init("https://api.autom.dev/v1/google/news?{$params}");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, ["x-api-key: {$apiKey}"]);
$data = json_decode(curl_exec($ch), true);
curl_close($ch);
foreach ($data["organic_results"] ?? [] as $article) {
if (!isset($seen[$article["link"]])) {
$seen[$article["link"]] = true;
$newArticles[] = array_merge($article, ["keyword" => $keyword]);
}
}
}
file_put_contents($seenFile, json_encode(array_keys($seen)));
echo "Encontrados " . count($newArticles) . " artículo(s) nuevo(s):\n";
foreach ($newArticles as $a) echo " [{$a['keyword']}] {$a['title']} — {$a['source']}\n";package main
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"os"
)
func fetchNews(apiKey, query string) []map[string]any {
params := url.Values{"q": {query}, "gl": {"es"}, "hl": {"es"}}
req, _ := http.NewRequest("GET", "https://api.autom.dev/v1/google/news?"+params.Encode(), nil)
req.Header.Set("x-api-key", apiKey)
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
var data map[string]any
json.Unmarshal(body, &data)
var out []map[string]any
for _, r := range data["organic_results"].([]any) { out = append(out, r.(map[string]any)) }
return out
}
func main() {
apiKey := "YOUR_API_KEY"
keywords := []string{"OpenAI", "Anthropic", "Mistral AI"}
seen := map[string]bool{}
if b, err := os.ReadFile("seen_articles.json"); err == nil {
var links []string
json.Unmarshal(b, &links)
for _, l := range links { seen[l] = true }
}
var newCount int
for _, kw := range keywords {
for _, a := range fetchNews(apiKey, kw) {
link := a["link"].(string)
if !seen[link] {
seen[link] = true
fmt.Printf(" [%s] %s — %s\n", kw, a["title"], a["source"])
newCount++
}
}
}
links := make([]string, 0, len(seen))
for l := range seen { links = append(links, l) }
b, _ := json.Marshal(links)
os.WriteFile("seen_articles.json", b, 0644)
fmt.Printf("Encontrados %d artículo(s) nuevo(s).\n", newCount)
}import java.net.URI;
import java.net.URLEncoder;
import java.net.http.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.util.*;
import org.json.*;
public class Main {
static HttpClient client = HttpClient.newHttpClient();
static String API_KEY = "YOUR_API_KEY";
public static void main(String[] args) throws Exception {
var keywords = List.of("OpenAI", "Anthropic", "Mistral AI");
var seenPath = Path.of("seen_articles.json");
var seen = new HashSet<String>();
if (Files.exists(seenPath)) {
var arr = new JSONArray(Files.readString(seenPath));
for (int i = 0; i < arr.length(); i++) seen.add(arr.getString(i));
}
int newCount = 0;
for (var kw : keywords) {
var q = URLEncoder.encode(kw, StandardCharsets.UTF_8);
var url = "https://api.autom.dev/v1/google/news?q=" + q + "&gl=es&hl=es";
var req = HttpRequest.newBuilder().uri(URI.create(url))
.header("x-api-key", API_KEY).GET().build();
var results = new JSONObject(client.send(req, HttpResponse.BodyHandlers.ofString()).body())
.getJSONArray("organic_results");
for (int i = 0; i < results.length(); i++) {
var a = results.getJSONObject(i);
if (seen.add(a.getString("link"))) {
System.out.printf(" [%s] %s — %s%n", kw, a.getString("title"), a.getString("source"));
newCount++;
}
}
}
Files.writeString(seenPath, new JSONArray(seen).toString());
System.out.println("Encontrados " + newCount + " artículo(s) nuevo(s).");
}
}using System.Net.Http;
using System.Text.Json;
var apiKey = "YOUR_API_KEY";
var keywords = new[] { "OpenAI", "Anthropic", "Mistral AI" };
var seenFile = "seen_articles.json";
using var client = new HttpClient();
client.DefaultRequestHeaders.Add("x-api-key", apiKey);
var seen = File.Exists(seenFile)
? JsonSerializer.Deserialize<HashSet<string>>(File.ReadAllText(seenFile))!
: new HashSet<string>();
int newCount = 0;
foreach (var keyword in keywords)
{
var body = await client.GetStringAsync(
$"https://api.autom.dev/v1/google/news?q={Uri.EscapeDataString(keyword)}&gl=es&hl=es");
foreach (var a in JsonDocument.Parse(body).RootElement.GetProperty("organic_results").EnumerateArray())
{
if (seen.Add(a.GetProperty("link").GetString()!))
{
Console.WriteLine($" [{keyword}] {a.GetProperty("title")} — {a.GetProperty("source")}");
newCount++;
}
}
}
File.WriteAllText(seenFile, JsonSerializer.Serialize(seen));
Console.WriteLine($"Encontrados {newCount} artículo(s) nuevo(s).");use reqwest::Client;
use serde_json::Value;
use std::{collections::HashSet, fs, path::Path};
async fn fetch_news(client: &Client, api_key: &str, query: &str) -> Vec<Value> {
let data = client.get("https://api.autom.dev/v1/google/news")
.header("x-api-key", api_key)
.query(&[("q", query), ("gl", "es"), ("hl", "es")])
.send().await.unwrap().json::<Value>().await.unwrap();
data["organic_results"].as_array().cloned().unwrap_or_default()
}
#[tokio::main]
async fn main() -> Result<(), reqwest::Error> {
let client = Client::new();
let api_key = "YOUR_API_KEY";
let keywords = ["OpenAI", "Anthropic", "Mistral AI"];
let seen_path = Path::new("seen_articles.json");
let mut seen: HashSet<String> = if seen_path.exists() {
serde_json::from_str::<Vec<String>>(&fs::read_to_string(seen_path).unwrap())
.unwrap().into_iter().collect()
} else { HashSet::new() };
let mut new_count = 0;
for keyword in &keywords {
for article in fetch_news(&client, api_key, keyword).await {
let link = article["link"].as_str().unwrap_or("").to_string();
if seen.insert(link) {
println!(" [{}] {} — {}", keyword, article["title"].as_str().unwrap_or(""), article["source"].as_str().unwrap_or(""));
new_count += 1;
}
}
}
let seen_vec: Vec<&String> = seen.iter().collect();
fs::write(seen_path, serde_json::to_string(&seen_vec).unwrap()).unwrap();
println!("Encontrados {new_count} artículo(s) nuevo(s).");
Ok(())
}Programa este script con un cron job (p. ej. cada hora) o un programador de tareas para recibir monitoreo continuo de cobertura. Combina varias palabras clave en una sola ejecución para minimizar el uso de créditos.