complete srgweb.triton module

This commit is contained in:
uskovgs 2025-06-06 18:10:01 +03:00
commit cac9701e3b
13 changed files with 690 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
*.pyc
*.pyo
__pycache__/
.quarto

1
README.md Normal file
View File

@ -0,0 +1 @@
# srgweb

10
_quarto.yml Normal file
View File

@ -0,0 +1,10 @@
quartodoc:
style: pkgdown
dir: reference
package: quartodoc
sections:
- title: Some functions
desc: Functions to inspect docstrings.
contents:
- get_object
- preview

20
examples.py Normal file
View File

@ -0,0 +1,20 @@
from srgweb.triton import (
triton_session,
list_programs,
get_program,
list_baskets,
get_basket
)
import keyring
# login to triton
sess = triton_session("uskov", keyring.get_password("PLAN_SRG", ""))
# list available programs
programs = list_programs(sess)
# download program SRGA
df = get_program(sess, program="SRGA")s
# list available baskets
baskets = list_baskets(sess)
# download basket ART-XC agns
df_basket = get_basket(sess, basket='ART-XC agns')

1
objects.json Normal file
View File

@ -0,0 +1 @@
{"project": "quartodoc", "version": "0.0.9999", "count": 4, "items": [{"name": "quartodoc.get_object", "domain": "py", "role": "function", "priority": "1", "uri": "reference/get_object.html#quartodoc.get_object", "dispname": "-"}, {"name": "quartodoc.autosummary.get_object", "domain": "py", "role": "function", "priority": "1", "uri": "reference/get_object.html#quartodoc.get_object", "dispname": "quartodoc.get_object"}, {"name": "quartodoc.preview", "domain": "py", "role": "function", "priority": "1", "uri": "reference/preview.html#quartodoc.preview", "dispname": "-"}, {"name": "quartodoc.ast.preview", "domain": "py", "role": "function", "priority": "1", "uri": "reference/preview.html#quartodoc.preview", "dispname": "quartodoc.preview"}]}

16
pyproject.toml Normal file
View File

@ -0,0 +1,16 @@
[project]
name = "srgweb"
version = "0.1.0"
description = "python interface to internal web services"
authors = [
{name = "uskovgs",email = "uskov@cosmos.ru"}
]
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
]
[build-system]
requires = ["poetry-core>=2.0.0,<3.0.0"]
build-backend = "poetry.core.masonry.api"

41
reference/get_object.qmd Normal file
View File

@ -0,0 +1,41 @@
# get_object { #quartodoc.get_object }
```python
get_object(
path,
object_name=None,
parser='numpy',
load_aliases=True,
dynamic=False,
loader=None,
)
```
Fetch a griffe object.
## Parameters {.doc-section .doc-section-parameters}
| Name | Type | Description | Default |
|--------------|-----------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|------------|
| path | str | An import path to the object. This should have the form `path.to.module:object`. For example, `quartodoc:get_object` or `quartodoc:MdRenderer.render`. | _required_ |
| object_name | \'str \| None\' | (Deprecated). A function name. | `None` |
| parser | str | A docstring parser to use. | `'numpy'` |
| load_aliases | | For aliases that were imported from other modules, should we load that module? | `True` |
| dynamic | | Whether to dynamically import object. Useful if docstring is not hard-coded, but was set on object by running python code. | `False` |
## See Also {.doc-section .doc-section-see-also}
preview: print a user-friendly preview of a griffe object.
## Examples {.doc-section .doc-section-examples}
```python
>>> get_function("quartodoc", "get_function")
<Function('get_function', ...
```
## Returns {.doc-section .doc-section-returns}
| Name | Type | Description |
|--------|-----------|---------------|
| x | dc.Object | abc |

10
reference/index.qmd Normal file
View File

@ -0,0 +1,10 @@
# Function reference {.doc .doc-index}
## Some functions
Functions to inspect docstrings.
| | |
| --- | --- |
| [get_object](get_object.qmd#quartodoc.get_object) | Fetch a griffe object. |
| [preview](preview.qmd#quartodoc.preview) | Print a friendly representation of a griffe object (e.g. function, docstring) |

24
reference/preview.qmd Normal file
View File

@ -0,0 +1,24 @@
# preview { #quartodoc.preview }
```python
preview(ast, max_depth=999, compact=False, as_string=False)
```
Print a friendly representation of a griffe object (e.g. function, docstring)
## Examples {.doc-section .doc-section-examples}
```python
>>> from quartodoc import get_object
>>> obj = get_object("quartodoc", "get_object")
```
```python
>>> preview(obj.docstring.parsed)
...
```
```python
>>> preview(obj)
...
```

1
src/srgweb/__init__.py Normal file
View File

@ -0,0 +1 @@
__all__: list[str] = []

200
src/srgweb/artsurvey.py Normal file
View File

@ -0,0 +1,200 @@
import requests
from bs4 import BeautifulSoup
import pandas as pd
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn
from astropy.coordinates import SkyCoord
import astropy.units as u
console = Console()
def artsurvey_session(
username: str = "",
password: str = "",
base_url: str = "http://arxiv.srg.rssi.ru/"
) -> requests.Session | None:
"""
Open ART-Survey session
Parameters
----------
username : str, optional
Your login username for ARTSurvey. Default is "".
password : str, optional
Your login password. Default is "".
base_url : str, optional
Base URL of the ARTSurvey system. Default is "http://arxiv.srg.rssi.ru/".
Returns
-------
requests.Session or None
Authenticated requests.Session object if login successful, otherwise None.
Example
-------
>>> sess = artsurvey_session("bob", keyring.get_password("PLAN_SRG", ""))
"""
LOGIN_URL = f"{base_url.rstrip('/')}/login"
HEADERS = {
"User-Agent": "srgweb, Python package (uskov@cosmos.ru)"
}
with console.status(
f"Logging in as [cyan]{username}[/cyan] to [link={base_url.rstrip('/')}][blue underline]<{base_url.rstrip('/')}>"
):
session = requests.Session()
session.headers.update(HEADERS)
# Step 1 — GET login page
resp = session.get(LOGIN_URL)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
form = soup.find("form")
if not form:
console.print("[red]Login form not found.[/red]")
return None
# Step 2 — Prepare form fields
action = form.get("action") or LOGIN_URL
full_action = action if action.startswith("http") else requests.compat.urljoin(LOGIN_URL, action)
payload = {}
for input_ in form.find_all("input"):
name = input_.get("name")
if not name:
continue
if name == "username":
payload[name] = username
elif name == "password":
payload[name] = password
else:
payload[name] = input_.get("value", "")
# Step 3 — POST login form
resp = session.post(full_action, data=payload, headers={"Referer": LOGIN_URL})
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
text = soup.get_text(strip=True)
if "login: form is not valid" in text.lower():
console.print("[bold red]Incorrect login or password.[/bold red]")
return None
console.print(
f"[green]✔[/green] Logged in as [cyan]{username}[/cyan] to [link={base_url.rstrip('/')}][blue underline]<{base_url.rstrip('/')}>"
)
return session
def default_artsurvey_settings() -> dict[str, str]:
"""
Возвращает словарь с параметрами по умолчанию для фильтров ARTSurvey.
Returns
-------
dict[str, str]
Параметры фильтрации для формы ARTSurvey.
"""
return {
"sky": "allsky",
"survey": "179",
"band": "E0",
"exclude_survey": "",
"exclude_band": "E0",
"exclude_log_nfalse": "",
"exclude_log_ml_nfalse": "",
"sign_ml_min": "",
"sign_ml_max": "",
"log_nfalse_min": "",
"log_nfalse_max": "",
"log_ml_nfalse_min": "",
"log_ml_nfalse_max": "",
"detlike_min": "",
"detlike_max": "",
"exposure_min": "",
"ext_min": "",
"ext_max": "",
"class_startswith": "",
"cname_contains": "",
"category": "",
"exclude_category": "",
# "category_unclassified": "",
# "gaia_primary": "",
# "allwise_primary": "",
# "turk_possible": "",
# "dec_min": "",
# "dec_max": "",
# "ecl_lat_min": "",
# "ecl_lat_max": "",
# "circle_ra": "",
# "circle_dec": "",
# "circle_rmax_deg": ""
}
def get_artsurvey_cat(session: requests.Session, survey_name: str = "S1-5v12345.12", **kwargs):
# 2. Получаем форму настроек
with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}")) as progress:
progress.add_task(description="Обновление фильтров обзора", total=None)
settings_url = f"{base_url.rstrip('/')}/artsurvey"
resp = session.get(settings_url)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
form = soup.find("form")
if not form:
console.print("[red]❌ Не найдена форма фильтров на странице artsurvey.[/red]")
return pd.DataFrame()
# 3. Собираем параметры формы
payload = {}
# 3.1 input-поля
for input_tag in form.find_all("input"):
name = input_tag.get("name")
if not name:
continue
payload[name] = input_tag.get("value", "")
# 3.2 select-поля
for select_tag in form.find_all("select"):
name = select_tag.get("name")
if not name:
continue
options = select_tag.find_all("option")
selected = None
for option in options:
if option.text.strip() == survey_name:
selected = option.get("value")
break
if selected:
payload[name] = selected
# 3.3 пользовательские аргументы (приоритетные)
for k, v in kwargs.items():
payload[k] = v
# 4. Отправляем фильтры
action = form.get("action") or settings_url
full_action = action if action.startswith("http") else requests.compat.urljoin(settings_url, action)
resp = session.post(full_action, data=payload, headers={"Referer": settings_url})
resp.raise_for_status()
# 5. Загружаем CSV
with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}")) as progress:
progress.add_task(description="Загрузка каталога ART-Survey (~10-20 секунд)", total=None)
csv_url = f"{base_url.rstrip('/')}/artsurvey/csv/all"
resp = session.get(csv_url)
resp.raise_for_status()
df = pd.read_csv(pd.compat.StringIO(resp.text), na_values=["", "None"])
# 6. Обработка
if df.empty:
console.print("[red]Каталог пуст. Возможно, фильтры слишком строгие.[/red]")
return df
if "ra" in df.columns and "dec" in df.columns:
coords = SkyCoord(ra=df["ra"].values * u.deg, dec=df["dec"].values * u.deg, frame="icrs")
df["skycoord"] = coords
return df

362
src/srgweb/triton.py Normal file
View File

@ -0,0 +1,362 @@
import requests
import io
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from rich.console import Console
import pandas as pd
from janitor import clean_names
console = Console()
def triton_session(username: str = "", password: str = "") -> requests.Session | None:
"""
Open triton session
Parameters
----------
username : str, optional
Your Triton login username. Default is an empty string.
password : str, optional
Your Triton login password. Default is an empty string.
Returns
-------
requests.Session or None
An authenticated requests.Session object if login is successful, otherwise None.
Example
-------
>>> sess = triton_session("bob", keyring.get_password("PLAN_SRG", ""))
"""
LOGIN_URL = "https://www.srg.cosmos.ru/logbook/login"
HEADERS = {
"User-Agent": "srgweb, Python package (uskov@cosmos.ru)"
}
with console.status(f"Logging in as [cyan]{username}[/cyan] to [link=https://www.srg.cosmos.ru/triton][blue underline]<https://www.srg.cosmos.ru/triton>"):
session = requests.Session()
session.headers.update(HEADERS)
resp = session.get(LOGIN_URL)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
# 2. Extract the first form
form = soup.find("form")
if not form:
console.log("[red]Login form not found.")
return None
action = form.get("action") or LOGIN_URL
full_action = action if action.startswith("http") else requests.compat.urljoin(LOGIN_URL, action)
# 3. Prepare payload from form inputs
payload = {}
for input_ in form.find_all("input"):
name = input_.get("name")
if not name:
continue
if name == "username":
payload[name] = username
elif name == "password":
payload[name] = password
else:
payload[name] = input_.get("value", "")
resp = session.post(full_action, data=payload, headers={"Referer": LOGIN_URL})
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
text = soup.get_text(strip=True)
if "login: form is not valid" in text.lower():
console.print("[bold red]Incorrect login or password.")
return None
console.print(f"[green]✔[/green] Logged in as [cyan]{username}[/cyan] to [link=https://www.srg.cosmos.ru/triton][blue underline]<https://www.srg.cosmos.ru/triton>")
return session
def list_programs(session: requests.Session) -> dict[str, str]:
"""
Get the list of the Programs available in Triton.
Parameters
----------
session : requests.Session
An authenticated requests.Session object.
Returns
-------
dict[str, str]
A dictionary mapping program names to their URLs.
Example
-------
>>> sess = triton_session("bob", keyring.get_password("PLAN_SRG", ""))
>>> programs = list_programs(sess)
>>> print(programs)
"""
MAIN_PAGE = "https://www.srg.cosmos.ru/triton/en"
BASE_URL = "https://www.srg.cosmos.ru"
resp = session.get(MAIN_PAGE)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
dropdown = soup.select_one(".dropdown-menu")
if dropdown is None:
raise RuntimeError("Dropdown menu not found on Triton main page")
links = dropdown.select("a")
items: dict[str, str] = {}
for a in links:
name = a.get_text(strip=True)
href = a.get("href") or ""
full_url = urljoin(BASE_URL, href)
if name:
items[name] = full_url
items["all"] = "https://www.srg.cosmos.ru/triton/show/all"
return items
def list_baskets(session: requests.Session) -> dict[str, str]:
"""
Get the list of baskets available in Triton.
Parameters
----------
session : requests.Session
An authenticated requests.Session object.
Returns
-------
dict[str, str]
A dictionary mapping basket titles to their URLs (sorted by title).
"""
MAIN_PAGE = "https://www.srg.cosmos.ru/triton/en"
resp = session.get(MAIN_PAGE)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
tables = soup.select(".table")
if len(tables) < 2:
raise RuntimeError("Basket table not found on Triton main page")
# Parse the second table as baskets
df_basket = pd.read_html(io.StringIO(str(tables[1])))[0]
df_basket = clean_names(df_basket)
# Extract basket URLs from <a> tags in the second table
basket_urls = []
for a in tables[1].select("a"):
href = a.get("href", "")
if "show" in href:
basket_urls.append(f"https://www.srg.cosmos.ru{href}")
# Add URLs to DataFrame
if len(basket_urls) == len(df_basket):
df_basket["url"] = basket_urls
else:
# fallback: fill with empty strings if mismatch
df_basket["url"] = basket_urls + [""] * (len(df_basket) - len(basket_urls))
# Map title to url, sort by title
vals = dict(sorted(zip(df_basket["title"], df_basket["url"]), key=lambda x: x[0].lower()))
return vals
def _triton_read_table(session: requests.Session, page_download: str) -> pd.DataFrame:
resp = session.get(page_download)
resp.raise_for_status()
csv_bytes: bytes = resp.content
df = pd.read_csv(
io.BytesIO(csv_bytes),
dtype={
"RA": "float64",
"Dec": "float64",
"objid": "int64",
"time_spent": "string",
"Redshift_str": "string",
"Redshift_err": "string",
"Status": "string",
},
na_values=["", "None"],
keep_default_na=True,
)
df = clean_names(df)
return df
def _triton_parse_src_urls(session: requests.Session, program_url: str) -> dict[str, str]:
"""
Returns a dictionary mapping source names to their URLs for each row in the program table.
Only <a> tags without a class and with 'show' in href are considered, matching the R logic.
Parameters
----------
session : requests.Session
An authenticated requests.Session object.
program_url : str
The URL of the program page to parse.
Returns
-------
dict[str, str]
A dictionary where keys are source names and values are their corresponding URLs.
"""
resp = session.get(program_url)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
# Find all <a> inside <td> inside .col-sm-12, no class, href contains 'show'
anchors = soup.select(".col-sm-12 td a:not([class])")
name_url: dict[str, str] = {}
for a in anchors:
href = a.get("href", "")
if "show" in href:
name = a.get_text(strip=True)
full_url = requests.compat.urljoin("https://www.srg.cosmos.ru", href)
name_url[name] = full_url
return name_url
def get_program(session: requests.Session, program: str = "SRGA") -> pd.DataFrame:
"""
Download and parse a program table from the Triton system.
Parameters
----------
session : requests.Session
An authenticated requests.Session object.
program : str, optional
The name of the program to download (case-insensitive). Default is "SRGA".
Returns
-------
pd.DataFrame
A DataFrame containing the program table with cleaned column names.
Columns include at least 'name', 'type', 'url', and, if astropy is installed, 'skycoord'.
Raises
------
ValueError
If the specified program is not found among available programs.
Example
-------
>>> sess = triton_session("bob", keyring.get_password("MYTOKEN", ""))
>>> df = get_program(sess, "SRGA")
>>> print(df.head())
"""
with console.status("Check available programs", spinner="dots"):
programs = list_programs(session)
programs_lc = {k.lower(): v for k, v in programs.items()}
console.print("[green]✔[/green] Check available programs")
prog_key = program.lower()
if prog_key not in programs_lc:
raise ValueError(f"Program {program} not found. Choose one of: {list(programs.keys())}")
with console.status(f"Parse URLs from the [green]{program}[/green] program", spinner="dots"):
src_vec = _triton_parse_src_urls(session, programs_lc[prog_key])
console.print(f"[green]✔[/green] Parse URLs from the [green]{program}[/green] program")
download_pages: dict[str, str] = {}
for name, url in programs.items():
if "program" in url:
download_pages[name.lower()] = url.replace("show", "download")
else:
download_pages[name.lower()] = "https://www.srg.cosmos.ru/triton/download"
with console.status(f"Downloading table from [green]{program}[/green]", spinner="dots"):
df = _triton_read_table(session, download_pages[prog_key])
console.print(f"[green]✔[/green] Downloading table from [green]{program}[/green]")
df["type"] = df["observernotes"].str.extract(r"type:\s?(.*)\n?", expand=False)
df["url"] = df["name"].map(src_vec).astype("string")
try:
from astropy.coordinates import SkyCoord
df["skycoord"] = SkyCoord(df['ra'], df['dec'], frame='icrs', unit='deg')
except ImportError:
pass
console.print("[green]✔[/green] Finished")
return df
def get_basket(session: requests.Session, basket: str = "") -> pd.DataFrame:
"""
Download and parse a basket table from the Triton system.
Parameters
----------
session : requests.Session
An authenticated requests.Session object.
basket : str, optional
The name of the basket to download (case-insensitive).
Returns
-------
pd.DataFrame
A DataFrame containing the basket table with cleaned column names and a 'skycoord' column if astropy is installed.
Raises
------
ValueError
If the specified basket is not found among available baskets.
"""
with console.status("Check available baskets", spinner="dots"):
baskets = list_baskets(session)
baskets_lc = {k.lower(): v for k, v in baskets.items()}
console.print("[green]✔[/green] Check available baskets")
basket_key = basket.lower()
if basket_key not in baskets_lc:
raise ValueError(f"Basket {basket} not found. Choose one of: {list(baskets.keys())}")
with console.status(f"Parse basket table from [green]{basket}[/green]", spinner="dots"):
url = baskets_lc[basket_key]
resp = session.get(url)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
tables = soup.select(".table")
if not tables:
raise RuntimeError("No tables found on basket page")
df_basket = pd.read_html(io.StringIO(str(tables[0])))[0]
df_basket = clean_names(df_basket)
console.print(f"[green]✔[/green] Parse basket table from [green]{basket}[/green]")
try:
from astropy.coordinates import SkyCoord
df_basket['skycoord'] = df_basket.apply(
lambda r: SkyCoord(f"{r['rahms']} {r['decdms']}", unit=("hourangle", "deg"), frame="icrs"),
axis=1
)
except ImportError:
pass
console.print("[green]✔[/green] Finished")
return df_basket

0
tests/__init__.py Normal file
View File