diff --git a/src/librarian/api/__init__.py b/src/librarian/api/__init__.py index 57158d2..9331532 100644 --- a/src/librarian/api/__init__.py +++ b/src/librarian/api/__init__.py @@ -35,7 +35,6 @@ def get_session(): SessionDep = Annotated[Session, Depends(get_session)] config: LibrarianConfig = LibrarianConfig() -llm_extractor: LLMExtractor | None = None @librarian_app.on_event("startup") @@ -44,8 +43,6 @@ def on_startup(): global llm_extractor create_db_and_tables() - llm_extractor = LLMExtractor( + librarian_app.state.llm_extractor = LLMExtractor( config.llm_api_key, config.llm_model, config.llm_base_path ) - - diff --git a/src/librarian/api/internal/crossref.py b/src/librarian/api/internal/crossref.py index be07293..a5c6aed 100644 --- a/src/librarian/api/internal/crossref.py +++ b/src/librarian/api/internal/crossref.py @@ -50,16 +50,23 @@ class CrossRef: def _parse_references(self, references: dict, depth: int) -> list[PaperInfo]: parsed_refs: list[PaperInfo] = [] for ref in references: + ref_info: PaperInfo | None = None doi = ref.get("DOI", None) title = ref.get("article-title", None) author = ref.get("author", None) + if doi: ref_info: PaperInfo = self.search(doi=doi, depth=depth - 1) elif title and author: - ref_info: PaperInfo = self.search( + possible_ref: PaperInfo = self.search( title=title, author=author, depth=depth - 1 ) - else: + # Sometimes if it's not an article it returns out of topic articles + # Just checking for the title matching to check if it's somewhat equal + if possible_ref.title == title: + ref_info = possible_ref + + if not ref_info: authors = [author] if author else [] publish_date = None if ref.get("year", 0) == 0 else ref["year"] published_in = ref.get("journal-title", None) @@ -72,7 +79,9 @@ class CrossRef: doi=doi, references=[], ) - parsed_refs.append(ref_info) + + if ref_info.title or ref_info.doi: + parsed_refs.append(ref_info) return parsed_refs def _parse_item(self, item: dict, depth: int) -> PaperInfo: @@ -92,6 +101,13 @@ class CrossRef: if date_parts and isinstance(date_parts[0], list) and date_parts[0]: publish_year = date_parts[0][0] + published_in = item.get("container-title", None) + if isinstance(published_in, list): + if len(published_in) > 0: + published_in = published_in[0] + else: + published_in = None + references = [] if depth > 0: references = self._parse_references(item.get("reference"), depth) @@ -101,7 +117,7 @@ class CrossRef: authors=authors, first_author=first_author, publish_date=int(publish_year) if publish_year else None, - published_in=item.get("container-title", [None])[0], + published_in=str(published_in), references=references, doi=item.get("DOI"), ) diff --git a/src/librarian/api/routers/document.py b/src/librarian/api/routers/document.py index 21ba793..4264e14 100644 --- a/src/librarian/api/routers/document.py +++ b/src/librarian/api/routers/document.py @@ -45,7 +45,6 @@ def _create_document_file(session: Session, filename: str, sha512: str) -> Docum def _create_document_info( session: Session, file_id: int | None, paper_info: PaperInfo ) -> DocumentInfo: - print(paper_info) doc_info = DocumentInfo( title=paper_info.title, publish_date=paper_info.publish_date, diff --git a/src/librarian/frontend/routers/index.py b/src/librarian/frontend/routers/index.py index 9e2b149..78c325b 100644 --- a/src/librarian/frontend/routers/index.py +++ b/src/librarian/frontend/routers/index.py @@ -8,5 +8,5 @@ router = APIRouter() @router.get("/") async def serve_index(request: Request) -> HTMLResponse: - return templates_folder.TemplateResponse(request=request, name="base.html") + return templates_folder.TemplateResponse(request=request, name="documents.html") diff --git a/src/librarian/frontend/static/js/documents.js b/src/librarian/frontend/static/js/documents.js new file mode 100644 index 0000000..e69de29 diff --git a/src/librarian/frontend/templates/base.html b/src/librarian/frontend/templates/base.html index 75d7d06..dc34c05 100644 --- a/src/librarian/frontend/templates/base.html +++ b/src/librarian/frontend/templates/base.html @@ -1,13 +1,19 @@ - + + + {% block head %} + + + {% block title %}{% endblock %} + + + {% endblock %} + - - - - - - - -{% include 'sidebar.html' %} - + + {% block content %}{% endblock %} + diff --git a/src/librarian/frontend/templates/documents.html b/src/librarian/frontend/templates/documents.html new file mode 100644 index 0000000..8597162 --- /dev/null +++ b/src/librarian/frontend/templates/documents.html @@ -0,0 +1,77 @@ +{% extends "base.html" %} +{% block title %}Document Index{% endblock %} +{% block content %} +
+

📚 Files

+ + + + + + + + +
+ + + + + + + + + + + + + + +
TitleFirst AuthorPublished InYearDOIAuthors
+
+
+ + +{% endblock %} diff --git a/src/librarian/frontend/templates/sidebar.html b/src/librarian/frontend/templates/sidebar.html deleted file mode 100644 index 7de9a4f..0000000 --- a/src/librarian/frontend/templates/sidebar.html +++ /dev/null @@ -1,554 +0,0 @@ - - -
-
-
-
-

- -

-
-
-

- -

-
-
-

- -

-
-
-
-

- -

-
-
-
-

- -

-
-
-

- -

-
-
-

- -

-
-
-

- -

-
-
-
-

- -

-
-
-
-

- -

-
-
-

- -

-
-
-

- -

-
-
-

- -

-
-
-
-