commit cac9701e3b263f7abede482c56e2940746eef3df Author: uskovgs Date: Fri Jun 6 18:10:01 2025 +0300 complete srgweb.triton module diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..272276e --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pyc +*.pyo +__pycache__/ +.quarto \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b38c58f --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# srgweb \ No newline at end of file diff --git a/_quarto.yml b/_quarto.yml new file mode 100644 index 0000000..02023aa --- /dev/null +++ b/_quarto.yml @@ -0,0 +1,10 @@ +quartodoc: + style: pkgdown + dir: reference + package: quartodoc + sections: + - title: Some functions + desc: Functions to inspect docstrings. + contents: + - get_object + - preview \ No newline at end of file diff --git a/examples.py b/examples.py new file mode 100644 index 0000000..47bc66d --- /dev/null +++ b/examples.py @@ -0,0 +1,20 @@ +from srgweb.triton import ( + triton_session, + list_programs, + get_program, + list_baskets, + get_basket +) +import keyring + +# login to triton +sess = triton_session("uskov", keyring.get_password("PLAN_SRG", "")) +# list available programs +programs = list_programs(sess) +# download program SRGA +df = get_program(sess, program="SRGA")s +# list available baskets +baskets = list_baskets(sess) +# download basket ART-XC agns +df_basket = get_basket(sess, basket='ART-XC agns') + diff --git a/objects.json b/objects.json new file mode 100644 index 0000000..06fe352 --- /dev/null +++ b/objects.json @@ -0,0 +1 @@ +{"project": "quartodoc", "version": "0.0.9999", "count": 4, "items": [{"name": "quartodoc.get_object", "domain": "py", "role": "function", "priority": "1", "uri": "reference/get_object.html#quartodoc.get_object", "dispname": "-"}, {"name": "quartodoc.autosummary.get_object", "domain": "py", "role": "function", "priority": "1", "uri": "reference/get_object.html#quartodoc.get_object", "dispname": "quartodoc.get_object"}, {"name": "quartodoc.preview", "domain": "py", "role": "function", "priority": "1", "uri": "reference/preview.html#quartodoc.preview", "dispname": "-"}, {"name": "quartodoc.ast.preview", "domain": "py", "role": "function", "priority": "1", "uri": "reference/preview.html#quartodoc.preview", "dispname": "quartodoc.preview"}]} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..700e577 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "srgweb" +version = "0.1.0" +description = "python interface to internal web services" +authors = [ + {name = "uskovgs",email = "uskov@cosmos.ru"} +] +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ +] + + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/reference/get_object.qmd b/reference/get_object.qmd new file mode 100644 index 0000000..933ac83 --- /dev/null +++ b/reference/get_object.qmd @@ -0,0 +1,41 @@ +# get_object { #quartodoc.get_object } + +```python +get_object( + path, + object_name=None, + parser='numpy', + load_aliases=True, + dynamic=False, + loader=None, +) +``` + +Fetch a griffe object. + +## Parameters {.doc-section .doc-section-parameters} + +| Name | Type | Description | Default | +|--------------|-----------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|------------| +| path | str | An import path to the object. This should have the form `path.to.module:object`. For example, `quartodoc:get_object` or `quartodoc:MdRenderer.render`. | _required_ | +| object_name | \'str \| None\' | (Deprecated). A function name. | `None` | +| parser | str | A docstring parser to use. | `'numpy'` | +| load_aliases | | For aliases that were imported from other modules, should we load that module? | `True` | +| dynamic | | Whether to dynamically import object. Useful if docstring is not hard-coded, but was set on object by running python code. | `False` | + +## See Also {.doc-section .doc-section-see-also} + +preview: print a user-friendly preview of a griffe object. + +## Examples {.doc-section .doc-section-examples} + +```python +>>> get_function("quartodoc", "get_function") +>> from quartodoc import get_object +>>> obj = get_object("quartodoc", "get_object") +``` + +```python +>>> preview(obj.docstring.parsed) +... +``` + +```python +>>> preview(obj) +... +``` \ No newline at end of file diff --git a/src/srgweb/__init__.py b/src/srgweb/__init__.py new file mode 100644 index 0000000..c9c2ef6 --- /dev/null +++ b/src/srgweb/__init__.py @@ -0,0 +1 @@ +__all__: list[str] = [] diff --git a/src/srgweb/artsurvey.py b/src/srgweb/artsurvey.py new file mode 100644 index 0000000..2b23966 --- /dev/null +++ b/src/srgweb/artsurvey.py @@ -0,0 +1,200 @@ +import requests +from bs4 import BeautifulSoup +import pandas as pd +from rich.console import Console +from rich.progress import Progress, SpinnerColumn, TextColumn +from astropy.coordinates import SkyCoord +import astropy.units as u + +console = Console() + +def artsurvey_session( + username: str = "", + password: str = "", + base_url: str = "http://arxiv.srg.rssi.ru/" +) -> requests.Session | None: + """ + Open ART-Survey session + + Parameters + ---------- + username : str, optional + Your login username for ARTSurvey. Default is "". + password : str, optional + Your login password. Default is "". + base_url : str, optional + Base URL of the ARTSurvey system. Default is "http://arxiv.srg.rssi.ru/". + + Returns + ------- + requests.Session or None + Authenticated requests.Session object if login successful, otherwise None. + + Example + ------- + >>> sess = artsurvey_session("bob", keyring.get_password("PLAN_SRG", "")) + """ + LOGIN_URL = f"{base_url.rstrip('/')}/login" + HEADERS = { + "User-Agent": "srgweb, Python package (uskov@cosmos.ru)" + } + + with console.status( + f"Logging in as [cyan]{username}[/cyan] to [link={base_url.rstrip('/')}][blue underline]<{base_url.rstrip('/')}>" + ): + session = requests.Session() + session.headers.update(HEADERS) + + # Step 1 — GET login page + resp = session.get(LOGIN_URL) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, "html.parser") + + form = soup.find("form") + if not form: + console.print("[red]Login form not found.[/red]") + return None + + # Step 2 — Prepare form fields + action = form.get("action") or LOGIN_URL + full_action = action if action.startswith("http") else requests.compat.urljoin(LOGIN_URL, action) + + payload = {} + for input_ in form.find_all("input"): + name = input_.get("name") + if not name: + continue + if name == "username": + payload[name] = username + elif name == "password": + payload[name] = password + else: + payload[name] = input_.get("value", "") + + # Step 3 — POST login form + resp = session.post(full_action, data=payload, headers={"Referer": LOGIN_URL}) + resp.raise_for_status() + + soup = BeautifulSoup(resp.text, "html.parser") + text = soup.get_text(strip=True) + + if "login: form is not valid" in text.lower(): + console.print("[bold red]Incorrect login or password.[/bold red]") + return None + + console.print( + f"[green]✔[/green] Logged in as [cyan]{username}[/cyan] to [link={base_url.rstrip('/')}][blue underline]<{base_url.rstrip('/')}>" + ) + return session + +def default_artsurvey_settings() -> dict[str, str]: + """ + Возвращает словарь с параметрами по умолчанию для фильтров ARTSurvey. + + Returns + ------- + dict[str, str] + Параметры фильтрации для формы ARTSurvey. + """ + return { + "sky": "allsky", + "survey": "179", + "band": "E0", + "exclude_survey": "", + "exclude_band": "E0", + "exclude_log_nfalse": "", + "exclude_log_ml_nfalse": "", + "sign_ml_min": "", + "sign_ml_max": "", + "log_nfalse_min": "", + "log_nfalse_max": "", + "log_ml_nfalse_min": "", + "log_ml_nfalse_max": "", + "detlike_min": "", + "detlike_max": "", + "exposure_min": "", + "ext_min": "", + "ext_max": "", + "class_startswith": "", + "cname_contains": "", + "category": "", + "exclude_category": "", + # "category_unclassified": "", + # "gaia_primary": "", + # "allwise_primary": "", + # "turk_possible": "", + # "dec_min": "", + # "dec_max": "", + # "ecl_lat_min": "", + # "ecl_lat_max": "", + # "circle_ra": "", + # "circle_dec": "", + # "circle_rmax_deg": "" + } + +def get_artsurvey_cat(session: requests.Session, survey_name: str = "S1-5v12345.12", **kwargs): + # 2. Получаем форму настроек + with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}")) as progress: + progress.add_task(description="Обновление фильтров обзора", total=None) + settings_url = f"{base_url.rstrip('/')}/artsurvey" + resp = session.get(settings_url) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, "html.parser") + + form = soup.find("form") + if not form: + console.print("[red]❌ Не найдена форма фильтров на странице artsurvey.[/red]") + return pd.DataFrame() + + # 3. Собираем параметры формы + payload = {} + + # 3.1 input-поля + for input_tag in form.find_all("input"): + name = input_tag.get("name") + if not name: + continue + payload[name] = input_tag.get("value", "") + + # 3.2 select-поля + for select_tag in form.find_all("select"): + name = select_tag.get("name") + if not name: + continue + options = select_tag.find_all("option") + selected = None + for option in options: + if option.text.strip() == survey_name: + selected = option.get("value") + break + if selected: + payload[name] = selected + + # 3.3 пользовательские аргументы (приоритетные) + for k, v in kwargs.items(): + payload[k] = v + + # 4. Отправляем фильтры + action = form.get("action") or settings_url + full_action = action if action.startswith("http") else requests.compat.urljoin(settings_url, action) + resp = session.post(full_action, data=payload, headers={"Referer": settings_url}) + resp.raise_for_status() + + # 5. Загружаем CSV + with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}")) as progress: + progress.add_task(description="Загрузка каталога ART-Survey (~10-20 секунд)", total=None) + csv_url = f"{base_url.rstrip('/')}/artsurvey/csv/all" + resp = session.get(csv_url) + resp.raise_for_status() + df = pd.read_csv(pd.compat.StringIO(resp.text), na_values=["", "None"]) + + # 6. Обработка + if df.empty: + console.print("[red]Каталог пуст. Возможно, фильтры слишком строгие.[/red]") + return df + + if "ra" in df.columns and "dec" in df.columns: + coords = SkyCoord(ra=df["ra"].values * u.deg, dec=df["dec"].values * u.deg, frame="icrs") + df["skycoord"] = coords + + return df diff --git a/src/srgweb/triton.py b/src/srgweb/triton.py new file mode 100644 index 0000000..fd7fde4 --- /dev/null +++ b/src/srgweb/triton.py @@ -0,0 +1,362 @@ +import requests +import io +from bs4 import BeautifulSoup +from urllib.parse import urljoin + +from rich.console import Console +import pandas as pd +from janitor import clean_names + + + +console = Console() + + +def triton_session(username: str = "", password: str = "") -> requests.Session | None: + """ + Open triton session + + Parameters + ---------- + username : str, optional + Your Triton login username. Default is an empty string. + password : str, optional + Your Triton login password. Default is an empty string. + + Returns + ------- + requests.Session or None + An authenticated requests.Session object if login is successful, otherwise None. + + Example + ------- + >>> sess = triton_session("bob", keyring.get_password("PLAN_SRG", "")) + """ + LOGIN_URL = "https://www.srg.cosmos.ru/logbook/login" + HEADERS = { + "User-Agent": "srgweb, Python package (uskov@cosmos.ru)" + } + with console.status(f"Logging in as [cyan]{username}[/cyan] to [link=https://www.srg.cosmos.ru/triton][blue underline]"): + session = requests.Session() + session.headers.update(HEADERS) + + resp = session.get(LOGIN_URL) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, "html.parser") + + # 2. Extract the first form + form = soup.find("form") + if not form: + console.log("[red]Login form not found.") + return None + + action = form.get("action") or LOGIN_URL + full_action = action if action.startswith("http") else requests.compat.urljoin(LOGIN_URL, action) + + # 3. Prepare payload from form inputs + payload = {} + for input_ in form.find_all("input"): + name = input_.get("name") + if not name: + continue + if name == "username": + payload[name] = username + elif name == "password": + payload[name] = password + else: + payload[name] = input_.get("value", "") + + + resp = session.post(full_action, data=payload, headers={"Referer": LOGIN_URL}) + resp.raise_for_status() + + + soup = BeautifulSoup(resp.text, "html.parser") + text = soup.get_text(strip=True) + if "login: form is not valid" in text.lower(): + console.print("[bold red]Incorrect login or password.") + return None + + console.print(f"[green]✔[/green] Logged in as [cyan]{username}[/cyan] to [link=https://www.srg.cosmos.ru/triton][blue underline]") + return session + + + + +def list_programs(session: requests.Session) -> dict[str, str]: + """ + Get the list of the Programs available in Triton. + + Parameters + ---------- + session : requests.Session + An authenticated requests.Session object. + + Returns + ------- + dict[str, str] + A dictionary mapping program names to their URLs. + + Example + ------- + >>> sess = triton_session("bob", keyring.get_password("PLAN_SRG", "")) + >>> programs = list_programs(sess) + >>> print(programs) + """ + MAIN_PAGE = "https://www.srg.cosmos.ru/triton/en" + BASE_URL = "https://www.srg.cosmos.ru" + + resp = session.get(MAIN_PAGE) + resp.raise_for_status() + + soup = BeautifulSoup(resp.text, "html.parser") + dropdown = soup.select_one(".dropdown-menu") + if dropdown is None: + raise RuntimeError("Dropdown menu not found on Triton main page") + + links = dropdown.select("a") + + items: dict[str, str] = {} + for a in links: + name = a.get_text(strip=True) + href = a.get("href") or "" + full_url = urljoin(BASE_URL, href) + if name: + items[name] = full_url + + items["all"] = "https://www.srg.cosmos.ru/triton/show/all" + + + return items + + + + + +def list_baskets(session: requests.Session) -> dict[str, str]: + """ + Get the list of baskets available in Triton. + + Parameters + ---------- + session : requests.Session + An authenticated requests.Session object. + + Returns + ------- + dict[str, str] + A dictionary mapping basket titles to their URLs (sorted by title). + """ + MAIN_PAGE = "https://www.srg.cosmos.ru/triton/en" + resp = session.get(MAIN_PAGE) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, "html.parser") + + tables = soup.select(".table") + if len(tables) < 2: + raise RuntimeError("Basket table not found on Triton main page") + + # Parse the second table as baskets + df_basket = pd.read_html(io.StringIO(str(tables[1])))[0] + df_basket = clean_names(df_basket) + + # Extract basket URLs from tags in the second table + basket_urls = [] + for a in tables[1].select("a"): + href = a.get("href", "") + if "show" in href: + basket_urls.append(f"https://www.srg.cosmos.ru{href}") + + # Add URLs to DataFrame + if len(basket_urls) == len(df_basket): + df_basket["url"] = basket_urls + else: + # fallback: fill with empty strings if mismatch + df_basket["url"] = basket_urls + [""] * (len(df_basket) - len(basket_urls)) + + # Map title to url, sort by title + vals = dict(sorted(zip(df_basket["title"], df_basket["url"]), key=lambda x: x[0].lower())) + return vals + + +def _triton_read_table(session: requests.Session, page_download: str) -> pd.DataFrame: + + resp = session.get(page_download) + resp.raise_for_status() + + csv_bytes: bytes = resp.content + df = pd.read_csv( + io.BytesIO(csv_bytes), + dtype={ + "RA": "float64", + "Dec": "float64", + "objid": "int64", + "time_spent": "string", + "Redshift_str": "string", + "Redshift_err": "string", + "Status": "string", + }, + na_values=["", "None"], + keep_default_na=True, + ) + + df = clean_names(df) + return df + + +def _triton_parse_src_urls(session: requests.Session, program_url: str) -> dict[str, str]: + """ + Returns a dictionary mapping source names to their URLs for each row in the program table. + Only tags without a class and with 'show' in href are considered, matching the R logic. + + Parameters + ---------- + session : requests.Session + An authenticated requests.Session object. + program_url : str + The URL of the program page to parse. + + Returns + ------- + dict[str, str] + A dictionary where keys are source names and values are their corresponding URLs. + """ + resp = session.get(program_url) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, "html.parser") + + # Find all inside inside .col-sm-12, no class, href contains 'show' + anchors = soup.select(".col-sm-12 td a:not([class])") + name_url: dict[str, str] = {} + for a in anchors: + href = a.get("href", "") + if "show" in href: + name = a.get_text(strip=True) + full_url = requests.compat.urljoin("https://www.srg.cosmos.ru", href) + name_url[name] = full_url + return name_url + + +def get_program(session: requests.Session, program: str = "SRGA") -> pd.DataFrame: + """ + Download and parse a program table from the Triton system. + + Parameters + ---------- + session : requests.Session + An authenticated requests.Session object. + program : str, optional + The name of the program to download (case-insensitive). Default is "SRGA". + + Returns + ------- + pd.DataFrame + A DataFrame containing the program table with cleaned column names. + Columns include at least 'name', 'type', 'url', and, if astropy is installed, 'skycoord'. + + Raises + ------ + ValueError + If the specified program is not found among available programs. + + Example + ------- + >>> sess = triton_session("bob", keyring.get_password("MYTOKEN", "")) + >>> df = get_program(sess, "SRGA") + >>> print(df.head()) + """ + + with console.status("Check available programs", spinner="dots"): + programs = list_programs(session) + programs_lc = {k.lower(): v for k, v in programs.items()} + console.print("[green]✔[/green] Check available programs") + + prog_key = program.lower() + if prog_key not in programs_lc: + raise ValueError(f"Program {program} not found. Choose one of: {list(programs.keys())}") + + + with console.status(f"Parse URLs from the [green]{program}[/green] program", spinner="dots"): + src_vec = _triton_parse_src_urls(session, programs_lc[prog_key]) + console.print(f"[green]✔[/green] Parse URLs from the [green]{program}[/green] program") + + + download_pages: dict[str, str] = {} + for name, url in programs.items(): + if "program" in url: + download_pages[name.lower()] = url.replace("show", "download") + else: + download_pages[name.lower()] = "https://www.srg.cosmos.ru/triton/download" + + with console.status(f"Downloading table from [green]{program}[/green]", spinner="dots"): + df = _triton_read_table(session, download_pages[prog_key]) + console.print(f"[green]✔[/green] Downloading table from [green]{program}[/green]") + + df["type"] = df["observernotes"].str.extract(r"type:\s?(.*)\n?", expand=False) + df["url"] = df["name"].map(src_vec).astype("string") + + + try: + from astropy.coordinates import SkyCoord + df["skycoord"] = SkyCoord(df['ra'], df['dec'], frame='icrs', unit='deg') + except ImportError: + pass + + console.print("[green]✔[/green] Finished") + return df + + +def get_basket(session: requests.Session, basket: str = "") -> pd.DataFrame: + """ + Download and parse a basket table from the Triton system. + + Parameters + ---------- + session : requests.Session + An authenticated requests.Session object. + basket : str, optional + The name of the basket to download (case-insensitive). + + Returns + ------- + pd.DataFrame + A DataFrame containing the basket table with cleaned column names and a 'skycoord' column if astropy is installed. + + Raises + ------ + ValueError + If the specified basket is not found among available baskets. + """ + with console.status("Check available baskets", spinner="dots"): + baskets = list_baskets(session) + baskets_lc = {k.lower(): v for k, v in baskets.items()} + console.print("[green]✔[/green] Check available baskets") + + basket_key = basket.lower() + if basket_key not in baskets_lc: + raise ValueError(f"Basket {basket} not found. Choose one of: {list(baskets.keys())}") + + with console.status(f"Parse basket table from [green]{basket}[/green]", spinner="dots"): + url = baskets_lc[basket_key] + resp = session.get(url) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, "html.parser") + tables = soup.select(".table") + if not tables: + raise RuntimeError("No tables found on basket page") + df_basket = pd.read_html(io.StringIO(str(tables[0])))[0] + df_basket = clean_names(df_basket) + console.print(f"[green]✔[/green] Parse basket table from [green]{basket}[/green]") + + try: + from astropy.coordinates import SkyCoord + df_basket['skycoord'] = df_basket.apply( + lambda r: SkyCoord(f"{r['rahms']} {r['decdms']}", unit=("hourangle", "deg"), frame="icrs"), + axis=1 + ) + except ImportError: + pass + + console.print("[green]✔[/green] Finished") + return df_basket + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29